diff options
Diffstat (limited to 'libkmime/kmime_charfreq.cpp')
-rw-r--r-- | libkmime/kmime_charfreq.cpp | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/libkmime/kmime_charfreq.cpp b/libkmime/kmime_charfreq.cpp new file mode 100644 index 000000000..ea3e42289 --- /dev/null +++ b/libkmime/kmime_charfreq.cpp @@ -0,0 +1,176 @@ +/* + kmime_charfreq.cpp + + KMime, the KDE internet mail/usenet news message library. + Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US +*/ + +#include "kmime_charfreq.h" + +namespace KMime { + +CharFreq::CharFreq( const QByteArray & buf ) + : NUL(0), + CTL(0), + CR(0), LF(0), + CRLF(0), + printable(0), + eightBit(0), + total(0), + lineMin(0xffffffff), + lineMax(0), + mTrailingWS(false), + mLeadingFrom(false) +{ + if ( !buf.isEmpty() ) + count( buf.data(), buf.size() ); +} + +CharFreq::CharFreq( const char * buf, size_t len ) + : NUL(0), + CTL(0), + CR(0), LF(0), + CRLF(0), + printable(0), + eightBit(0), + total(0), + lineMin(0xffffffff), + lineMax(0), + mTrailingWS(false), + mLeadingFrom(false) +{ + if ( buf && len > 0 ) + count( buf, len ); +} + +static inline bool isWS( char ch ) { return ( ch == '\t' || ch == ' ' ); } + +void CharFreq::count( const char * it, size_t len ) { + + const char * end = it + len; + uint currentLineLength = 0; + // initialize the prevChar with LF so that From_ detection works w/o + // special-casing: + char prevChar = '\n'; + char prevPrevChar = 0; + + for ( ; it != end ; ++it ) { + ++currentLineLength; + switch ( *it ) { + case '\0': ++NUL; break; + case '\r': ++CR; break; + case '\n': ++LF; + if ( prevChar == '\r' ) { --currentLineLength; ++CRLF; } + if ( currentLineLength >= lineMax ) lineMax = currentLineLength-1; + if ( currentLineLength <= lineMin ) lineMin = currentLineLength-1; + if ( !mTrailingWS ) + if ( isWS( prevChar ) || ( prevChar == '\r' && isWS( prevPrevChar ) ) ) + mTrailingWS = true; + currentLineLength = 0; + break; + case 'F': // check for lines starting with From_ if not found already: + if ( !mLeadingFrom ) + if ( prevChar == '\n' && end - it >= 5 && !qstrncmp( "From ", it, 5 ) ) + mLeadingFrom = true; + ++printable; + break; + default: + { + uchar c = *it; + if ( c == '\t' || c >= ' ' && c <= '~' ) + ++printable; + else if ( c == 127 || c < ' ' ) + ++CTL; + else + ++eightBit; + } + } + prevPrevChar = prevChar; + prevChar = *it; + } + + // consider the length of the last line + if ( currentLineLength >= lineMax ) lineMax = currentLineLength; + if ( currentLineLength <= lineMin ) lineMin = currentLineLength; + + // check whether the last character is tab or space + if ( isWS( prevChar ) ) + mTrailingWS = true; + + total = len; +} + +bool CharFreq::isEightBitData() const { + return type() == EightBitData; +} + +bool CharFreq::isEightBitText() const { + return type() == EightBitText; +} + +bool CharFreq::isSevenBitData() const { + return type() == SevenBitData; +} + +bool CharFreq::isSevenBitText() const { + return type() == SevenBitText; +} + +bool CharFreq::hasTrailingWhitespace() const { + return mTrailingWS; +} + +bool CharFreq::hasLeadingFrom() const { + return mLeadingFrom; +} + +CharFreq::Type CharFreq::type() const { +#if 0 + qDebug( "Total: %d; NUL: %d; CTL: %d;\n" + "CR: %d; LF: %d; CRLF: %d;\n" + "lineMin: %d; lineMax: %d;\n" + "printable: %d; eightBit: %d;\n" + "trailing whitespace: %s;\n" + "leading 'From ': %s;\n", + total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax, + printable, eightBit, + mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" ); +#endif + if ( NUL ) // must be binary + return Binary; + + // doesn't contain NUL's: + if ( eightBit ) { + if ( lineMax > 988 ) return EightBitData; // not allowed in 8bit + if ( CR != CRLF || controlCodesRatio() > 0.2 ) return EightBitData; + return EightBitText; + } + + // doesn't contain NUL's, nor 8bit chars: + if ( lineMax > 988 ) return SevenBitData; + if ( CR != CRLF || controlCodesRatio() > 0.2 ) return SevenBitData; + + // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars: + return SevenBitText; +} + +float CharFreq::printableRatio() const { + if ( total ) return float(printable) / float(total); + else return 0; +} + +float CharFreq::controlCodesRatio() const { + if ( total ) return float(CTL) / float(total); + else return 0; +} + +} // namespace KMime + + |