summaryrefslogtreecommitdiffstats
path: root/libkmime/kmime_charfreq.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libkmime/kmime_charfreq.cpp')
-rw-r--r--libkmime/kmime_charfreq.cpp176
1 files changed, 176 insertions, 0 deletions
diff --git a/libkmime/kmime_charfreq.cpp b/libkmime/kmime_charfreq.cpp
new file mode 100644
index 000000000..ea3e42289
--- /dev/null
+++ b/libkmime/kmime_charfreq.cpp
@@ -0,0 +1,176 @@
+/*
+ kmime_charfreq.cpp
+
+ KMime, the KDE internet mail/usenet news message library.
+ Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US
+*/
+
+#include "kmime_charfreq.h"
+
+namespace KMime {
+
+CharFreq::CharFreq( const QByteArray & buf )
+ : NUL(0),
+ CTL(0),
+ CR(0), LF(0),
+ CRLF(0),
+ printable(0),
+ eightBit(0),
+ total(0),
+ lineMin(0xffffffff),
+ lineMax(0),
+ mTrailingWS(false),
+ mLeadingFrom(false)
+{
+ if ( !buf.isEmpty() )
+ count( buf.data(), buf.size() );
+}
+
+CharFreq::CharFreq( const char * buf, size_t len )
+ : NUL(0),
+ CTL(0),
+ CR(0), LF(0),
+ CRLF(0),
+ printable(0),
+ eightBit(0),
+ total(0),
+ lineMin(0xffffffff),
+ lineMax(0),
+ mTrailingWS(false),
+ mLeadingFrom(false)
+{
+ if ( buf && len > 0 )
+ count( buf, len );
+}
+
+static inline bool isWS( char ch ) { return ( ch == '\t' || ch == ' ' ); }
+
+void CharFreq::count( const char * it, size_t len ) {
+
+ const char * end = it + len;
+ uint currentLineLength = 0;
+ // initialize the prevChar with LF so that From_ detection works w/o
+ // special-casing:
+ char prevChar = '\n';
+ char prevPrevChar = 0;
+
+ for ( ; it != end ; ++it ) {
+ ++currentLineLength;
+ switch ( *it ) {
+ case '\0': ++NUL; break;
+ case '\r': ++CR; break;
+ case '\n': ++LF;
+ if ( prevChar == '\r' ) { --currentLineLength; ++CRLF; }
+ if ( currentLineLength >= lineMax ) lineMax = currentLineLength-1;
+ if ( currentLineLength <= lineMin ) lineMin = currentLineLength-1;
+ if ( !mTrailingWS )
+ if ( isWS( prevChar ) || ( prevChar == '\r' && isWS( prevPrevChar ) ) )
+ mTrailingWS = true;
+ currentLineLength = 0;
+ break;
+ case 'F': // check for lines starting with From_ if not found already:
+ if ( !mLeadingFrom )
+ if ( prevChar == '\n' && end - it >= 5 && !qstrncmp( "From ", it, 5 ) )
+ mLeadingFrom = true;
+ ++printable;
+ break;
+ default:
+ {
+ uchar c = *it;
+ if ( c == '\t' || c >= ' ' && c <= '~' )
+ ++printable;
+ else if ( c == 127 || c < ' ' )
+ ++CTL;
+ else
+ ++eightBit;
+ }
+ }
+ prevPrevChar = prevChar;
+ prevChar = *it;
+ }
+
+ // consider the length of the last line
+ if ( currentLineLength >= lineMax ) lineMax = currentLineLength;
+ if ( currentLineLength <= lineMin ) lineMin = currentLineLength;
+
+ // check whether the last character is tab or space
+ if ( isWS( prevChar ) )
+ mTrailingWS = true;
+
+ total = len;
+}
+
+bool CharFreq::isEightBitData() const {
+ return type() == EightBitData;
+}
+
+bool CharFreq::isEightBitText() const {
+ return type() == EightBitText;
+}
+
+bool CharFreq::isSevenBitData() const {
+ return type() == SevenBitData;
+}
+
+bool CharFreq::isSevenBitText() const {
+ return type() == SevenBitText;
+}
+
+bool CharFreq::hasTrailingWhitespace() const {
+ return mTrailingWS;
+}
+
+bool CharFreq::hasLeadingFrom() const {
+ return mLeadingFrom;
+}
+
+CharFreq::Type CharFreq::type() const {
+#if 0
+ qDebug( "Total: %d; NUL: %d; CTL: %d;\n"
+ "CR: %d; LF: %d; CRLF: %d;\n"
+ "lineMin: %d; lineMax: %d;\n"
+ "printable: %d; eightBit: %d;\n"
+ "trailing whitespace: %s;\n"
+ "leading 'From ': %s;\n",
+ total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
+ printable, eightBit,
+ mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" );
+#endif
+ if ( NUL ) // must be binary
+ return Binary;
+
+ // doesn't contain NUL's:
+ if ( eightBit ) {
+ if ( lineMax > 988 ) return EightBitData; // not allowed in 8bit
+ if ( CR != CRLF || controlCodesRatio() > 0.2 ) return EightBitData;
+ return EightBitText;
+ }
+
+ // doesn't contain NUL's, nor 8bit chars:
+ if ( lineMax > 988 ) return SevenBitData;
+ if ( CR != CRLF || controlCodesRatio() > 0.2 ) return SevenBitData;
+
+ // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars:
+ return SevenBitText;
+}
+
+float CharFreq::printableRatio() const {
+ if ( total ) return float(printable) / float(total);
+ else return 0;
+}
+
+float CharFreq::controlCodesRatio() const {
+ if ( total ) return float(CTL) / float(total);
+ else return 0;
+}
+
+} // namespace KMime
+
+