1 files changed, 466 insertions, 0 deletions
diff --git a/libkmime/kmime_parsers.cpp b/libkmime/kmime_parsers.cpp
new file mode 100644
index 000000000..181ce6772
--- /dev/null
+++ b/libkmime/kmime_parsers.cpp
@@ -0,0 +1,466 @@
+/*
+    kmime_parsers.cpp
+
+    KMime, the KDE internet mail/usenet news message library.
+    Copyright (c) 2001 the KMime authors.
+    See file AUTHORS for details
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US
+*/
+#include "kmime_parsers.h"
+
+#include <qregexp.h>
+
+using namespace KMime::Parser;
+
+namespace KMime {
+namespace Parser {
+
+
+MultiPart::MultiPart(const QCString &src, const QCString &boundary)
+{
+  s_rc=src;
+  b_oundary=boundary;
+}
+
+
+bool MultiPart::parse()
+{
+  QCString b="--"+b_oundary, part;
+  int pos1=0, pos2=0, blen=b.length();
+
+  p_arts.clear();
+
+  //find the first valid boundary
+  while(1) {
+    if( (pos1=s_rc.find(b, pos1))==-1 || pos1==0 || s_rc[pos1-1]=='\n' ) //valid boundary found or no boundary at all
+      break;
+    pos1+=blen; //boundary found but not valid => skip it;
+  }
+
+  if(pos1>-1) {
+    pos1+=blen;
+    if(s_rc[pos1]=='-' && s_rc[pos1+1]=='-') // the only valid boundary is the end-boundary - this message is *really* broken
+      pos1=-1; //we give up
+    else if( (pos1-blen)>1 ) //preamble present
+      p_reamble=s_rc.left(pos1-blen);
+  }
+
+
+  while(pos1>-1 && pos2>-1) {
+
+    //skip the rest of the line for the first boundary - the message-part starts here
+    if( (pos1=s_rc.find('\n', pos1))>-1 ) { //now search the next linebreak
+      //now find the next valid boundary
+      pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
+      while(1) {
+        if( (pos2=s_rc.find(b, pos2))==-1 || s_rc[pos2-1]=='\n' ) //valid boundary or no more boundaries found
+          break;
+        pos2+=blen; //boundary is invalid => skip it;
+      }
+
+      if(pos2==-1) { // no more boundaries found
+        part=s_rc.mid(pos1, s_rc.length()-pos1); //take the rest of the string
+        p_arts.append(part);
+        pos1=-1;
+        pos2=-1; //break;
+      }
+      else {
+        part=s_rc.mid(pos1, pos2-pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
+        p_arts.append(part);
+        pos2+=blen; //pos2 points now to the first charakter after the boundary
+        if(s_rc[pos2]=='-' && s_rc[pos2+1]=='-') { //end-boundary
+          pos1=pos2+2; //pos1 points now to the character directly after the end-boundary
+          if( (pos1=s_rc.find('\n', pos1))>-1 ) //skipt the rest of this line
+            e_pilouge=s_rc.mid(pos1+1, s_rc.length()-pos1-1); //everything after the end-boundary is considered as the epilouge
+          pos1=-1;
+          pos2=-1; //break
+        }
+        else {
+          pos1=pos2; //the search continues ...
+        }
+      }
+    }
+  }
+
+  return (!p_arts.isEmpty());
+}
+
+//============================================================================================
+
+
+NonMimeParser::NonMimeParser(const QCString &src) :
+  s_rc(src), p_artNr(-1), t_otalNr(-1)
+{}
+
+/**
+ * try to guess the mimetype from the file-extension
+ */
+QCString NonMimeParser::guessMimeType(const QCString& fileName)
+{
+  QCString tmp, mimeType;
+  int pos;
+
+  if(!fileName.isEmpty()) {
+    pos=fileName.findRev('.');
+    if(pos++ != -1) {
+      tmp=fileName.mid(pos, fileName.length()-pos).upper();
+      if(tmp=="JPG" || tmp=="JPEG")       mimeType="image/jpeg";
+      else if(tmp=="GIF")                 mimeType="image/gif";
+      else if(tmp=="PNG")                 mimeType="image/png";
+      else if(tmp=="TIFF" || tmp=="TIF")  mimeType="image/tiff";
+      else if(tmp=="XPM")                 mimeType="image/x-xpm";
+      else if(tmp=="XBM")                 mimeType="image/x-xbm";
+      else if(tmp=="BMP")                 mimeType="image/x-bmp";
+      else if(tmp=="TXT" ||
+              tmp=="ASC" ||
+              tmp=="H" ||
+              tmp=="C" ||
+              tmp=="CC" ||
+              tmp=="CPP")                 mimeType="text/plain";
+      else if(tmp=="HTML" || tmp=="HTM")  mimeType="text/html";
+      else                                mimeType="application/octet-stream";
+    }
+    else mimeType="application/octet-stream";
+  }
+  else mimeType="application/octet-stream";
+
+  return mimeType;
+}
+
+//============================================================================================
+
+
+UUEncoded::UUEncoded(const QCString &src, const QCString &subject) :
+  NonMimeParser(src), s_ubject(subject)
+{}
+
+
+bool UUEncoded::parse()
+{
+  int currentPos=0;
+  bool success=true, firstIteration=true;
+
+  while (success) {
+    int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
+    bool containsBegin=false, containsEnd=false;
+    QCString tmp,fileName;
+
+    if( (beginPos=s_rc.find(QRegExp("begin [0-9][0-9][0-9]"),currentPos))>-1 && (beginPos==0 || s_rc.at(beginPos-1)=='\n') ) {
+      containsBegin=true;
+      uuStart=s_rc.find('\n', beginPos);
+      if(uuStart==-1) {//no more line breaks found, we give up
+        success = false;
+        break;
+      } else
+        uuStart++; //points now at the beginning of the next line
+    }
+      else beginPos=currentPos;
+
+    if ( (endPos=s_rc.find("\nend",(uuStart>0)? uuStart-1:0))==-1 )
+      endPos=s_rc.length(); //no end found
+    else
+      containsEnd=true;
+
+    if ((containsBegin && containsEnd) || firstIteration) {
+
+      //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
+      //all lines in a uuencoded text start with 'M'
+      for(int idx=uuStart; idx<endPos; idx++)
+        if(s_rc[idx]=='\n') {
+          lineCount++;
+          if(idx+1<endPos && s_rc[idx+1]=='M') {
+            idx++;
+            MCount++;
+          }
+        }
+
+      //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
+      if( MCount==0 || (lineCount-MCount)>10 ||
+          ((!containsBegin || !containsEnd) && (MCount<15)) ) {  // harder check for splitted-articles
+        success = false;
+        break; //too many "non-M-Lines" found, we give up
+      }
+
+      if( (!containsBegin || !containsEnd) && s_ubject) {  // message may be split up => parse subject
+	QRegExp rx("[0-9]+/[0-9]+");
+	pos=rx.search(QString(s_ubject), 0);
+	len=rx.matchedLength();
+        if(pos!=-1) {
+          tmp=s_ubject.mid(pos, len);
+          pos=tmp.find('/');
+          p_artNr=tmp.left(pos).toInt();
+          t_otalNr=tmp.right(tmp.length()-pos-1).toInt();
+        } else {
+          success = false;
+          break; //no "part-numbers" found in the subject, we give up
+        }
+      }
+
+      //everything before "begin" is text
+      if(beginPos>0)
+        t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
+
+      if(containsBegin)
+        fileName = s_rc.mid(beginPos+10, uuStart-beginPos-11); //everything between "begin ### " and the next LF is considered as the filename
+      else
+        fileName = "";
+      f_ilenames.append(fileName);
+      b_ins.append(s_rc.mid(uuStart, endPos-uuStart+1)); //everything beetween "begin" and "end" is uuencoded     
+      m_imeTypes.append(guessMimeType(fileName));
+      firstIteration=false;
+
+      int next = s_rc.find('\n', endPos+1);
+      if(next==-1) { //no more line breaks found, we give up
+        success = false;
+        break;
+      } else
+        next++; //points now at the beginning of the next line
+      currentPos = next;
+
+    } else {
+      success = false;
+    }
+  }
+
+  // append trailing text part of the article
+  t_ext.append(s_rc.right(s_rc.length()-currentPos));
+
+  return ((b_ins.count()>0) || isPartial());
+}
+
+
+//============================================================================================
+
+
+YENCEncoded::YENCEncoded(const QCString &src) :
+  NonMimeParser(src)
+{}
+
+
+bool YENCEncoded::yencMeta(QCString& src, const QCString& name, int* value)
+{
+  bool found = false;
+  QCString sought=name + "=";
+
+  int iPos=src.find( sought);
+  if (iPos>-1) {
+    int pos1=src.find(' ', iPos);
+    int pos2=src.find('\r', iPos);
+    int pos3=src.find('\t', iPos);
+    int pos4=src.find('\n', iPos);
+    if (pos2>=0 && (pos1<0 || pos1>pos2))
+      pos1=pos2;
+    if (pos3>=0 && (pos1<0 || pos1>pos3))
+      pos1=pos3;
+    if (pos4>=0 && (pos1<0 || pos1>pos4))
+      pos1=pos4;
+    iPos=src.findRev( '=', pos1)+1;
+    if (iPos<pos1) {
+      char c=src.at( iPos);
+      if ( c>='0' && c<='9') {
+        found=true;
+        *value=src.mid( iPos, pos1-iPos).toInt();
+      }
+    }
+  }
+  return found;
+}
+
+
+bool YENCEncoded::parse()
+{
+  int currentPos=0;
+  bool success=true;
+
+  while (success) {
+    int beginPos=currentPos, yencStart=currentPos;
+    bool containsPart=false;
+    QCString fileName,mimeType;
+
+    if ((beginPos=s_rc.find("=ybegin ", currentPos))>-1 && ( beginPos==0 || s_rc.at( beginPos-1)=='\n') ) {
+      yencStart=s_rc.find( '\n', beginPos);
+      if (yencStart==-1) { // no more line breaks found, give up
+        success = false;
+        break;
+      } else {
+        yencStart++;
+        if (s_rc.find("=ypart", yencStart)==yencStart) {
+          containsPart=true;
+          yencStart=s_rc.find( '\n', yencStart);
+          if ( yencStart== -1) {
+            success=false;
+            break;
+          }
+          yencStart++;
+        }
+      }
+      // Try to identify yenc meta data
+
+      // Filenames can contain any embedded chars until end of line
+      QCString meta=s_rc.mid(beginPos, yencStart-beginPos);
+      int namePos=meta.find("name=");
+      if (namePos== -1) {
+        success=false;
+        break;
+      }
+      int eolPos=meta.find('\r', namePos);
+      if (eolPos== -1)
+      eolPos=meta.find('\n', namePos);    
+      if (eolPos== -1) {
+        success=false;
+        break;
+      }
+      fileName=meta.mid(namePos+5, eolPos-(namePos+5));
+
+      // Other metadata is integer
+      int yencLine;
+      if (!yencMeta(meta, "line", &yencLine)) {
+        success=false;
+        break;
+      }
+      int yencSize;
+      if (!yencMeta( meta, "size", &yencSize)) {
+        success=false;
+        break;
+      }
+
+      int partBegin, partEnd;
+      if (containsPart) {
+        if (!yencMeta(meta, "part", &p_artNr)) {
+          success=false;
+          break;
+        }
+        if (!yencMeta(meta, "begin", &partBegin) || !
+             yencMeta(meta, "end", &partEnd)) {
+          success=false;
+          break;
+        }
+        if (!yencMeta(meta, "total", &t_otalNr))
+          t_otalNr=p_artNr+1;
+        if (yencSize==partEnd-partBegin+1)
+          t_otalNr=1; else
+        yencSize=partEnd-partBegin+1;
+      }
+
+      // We have a valid yenc header; now we extract the binary data
+      int totalSize=0;
+      int pos=yencStart;
+      int len=s_rc.length();
+      bool lineStart=true;
+      int lineLength=0;
+      bool containsEnd=false;
+      QByteArray binary = QByteArray(yencSize);
+      while (pos<len) {
+        int ch=s_rc.at(pos);
+        if (ch<0)
+          ch+=256;
+        if (ch=='\r')
+        {
+          if (lineLength!=yencLine && totalSize!=yencSize)          
+            break;          
+          pos++;
+        }
+        else if (ch=='\n')
+        {
+          lineStart=true;
+          lineLength=0;
+          pos++;
+        }
+        else
+        {
+          if (ch=='=')
+          {
+            if (pos+1<len)
+            {
+              ch=s_rc.at( pos+1);
+              if (lineStart && ch=='y')
+              {
+                containsEnd=true;
+                break;
+              }
+              pos+=2;
+              ch-=64+42;
+              if (ch<0)
+                ch+=256;
+              if (totalSize>=yencSize)            
+                break;            
+              binary.at(totalSize++)=ch;
+              lineLength++;
+            }
+            else            
+              break;            
+          }
+          else
+          {
+            ch-=42;
+            if (ch<0)
+              ch+=256;
+            if (totalSize>=yencSize)            
+              break;
+            binary.at(totalSize++)=ch;
+            lineLength++;
+            pos++;
+          }
+          lineStart=false;
+        }
+      }
+      
+      if (!containsEnd)
+      {
+        success=false;
+        break;
+      }
+      if (totalSize!=yencSize)
+      {        
+        success=false;
+        break;
+      }
+
+      // pos now points to =yend; get end data
+      eolPos=s_rc.find('\n', pos);
+      if (eolPos== -1)
+      {
+        success=false;
+        break;
+      }
+      meta=s_rc.mid(pos, eolPos-pos);
+      if (!yencMeta(meta, "size", &totalSize))
+      {        
+        success=false;
+        break;
+      }
+      if (totalSize!=yencSize)
+      {        
+        success=false;
+        break;
+      }
+
+      f_ilenames.append(fileName);
+      m_imeTypes.append(guessMimeType( fileName));
+      b_ins.append(binary);
+
+      //everything before "begin" is text
+      if(beginPos>0)
+        t_ext.append(s_rc.mid(currentPos,beginPos-currentPos));
+      currentPos = eolPos+1;
+
+    } else {
+      success = false;
+    }
+  }
+
+  // append trailing text part of the article
+  t_ext.append(s_rc.right(s_rc.length()-currentPos));
+
+  return b_ins.count()>0;
+}
+
+} // namespace Parser
+} // namespace KMime