diff options
Diffstat (limited to 'libksieve/parser/parser.cpp')
-rw-r--r-- | libksieve/parser/parser.cpp | 651 |
1 files changed, 651 insertions, 0 deletions
diff --git a/libksieve/parser/parser.cpp b/libksieve/parser/parser.cpp new file mode 100644 index 000000000..8c2db050e --- /dev/null +++ b/libksieve/parser/parser.cpp @@ -0,0 +1,651 @@ +/* -*- c++ -*- + parser/parser.cpp + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#include <config.h> + +#include <ksieve/parser.h> +#include <impl/parser.h> + +#include <ksieve/error.h> + +#include <qstring.h> + +#include <assert.h> +#include <limits.h> // ULONG_MAX +#include <ctype.h> // isdigit + +namespace KSieve { + + // + // + // Parser Bridge implementation + // + // + + Parser::Parser( const char * scursor, const char * const send, int options ) + : i( 0 ) + { + i = new Impl( scursor, send, options ); + } + + Parser::~Parser() { + delete i; i = 0; + } + + void Parser::setScriptBuilder( ScriptBuilder * builder ) { + assert( i ); + i->mBuilder = builder; + } + + ScriptBuilder * Parser::scriptBuilder() const { + assert( i ); + return i->mBuilder; + } + + const Error & Parser::error() const { + assert( i ); + return i->error(); + } + + bool Parser::parse() { + assert( i ); + return i->parse(); + } + +} + +static inline unsigned long factorForQuantifier( char ch ) { + switch ( ch ) { + case 'g': + case 'G': + return 1024*1024*1024; + case 'm': + case 'M': + return 1024*1024; + case 'k': + case 'K': + return 1024; + default: + assert( 0 ); // lexer should prohibit this + return 1; // make compiler happy + } +} + +static inline bool willOverflowULong( unsigned long result, unsigned long add ) { + static const unsigned long maxULongByTen = (unsigned long)(ULONG_MAX / 10.0) ; + return result > maxULongByTen || ULONG_MAX - 10 * result < add ; +} + +namespace KSieve { + + // + // + // Parser Implementation + // + // + + Parser::Impl::Impl( const char * scursor, const char * const send, int options ) + : mToken( Lexer::None ), + lexer( scursor, send, options ), + mBuilder( 0 ) + { + + } + + bool Parser::Impl::isStringToken() const { + return token() == Lexer::QuotedString || + token() == Lexer::MultiLineString ; + } + + + bool Parser::Impl::isArgumentToken() const { + return isStringToken() || + token() == Lexer::Number || + token() == Lexer::Tag || + token() == Lexer::Special && mTokenValue == "[" ; + } + + bool Parser::Impl::obtainToken() { + while ( !mToken && !lexer.atEnd() && !lexer.error() ) { + mToken = lexer.nextToken( mTokenValue ); + if ( lexer.error() ) + break; + // comments and line feeds are semantically invisible and may + // appear anywhere, so we handle them here centrally: + switch ( token() ) { + case Lexer::HashComment: + if ( scriptBuilder() ) + scriptBuilder()->hashComment( tokenValue() ); + consumeToken(); + break; + case Lexer::BracketComment: + if ( scriptBuilder() ) + scriptBuilder()->bracketComment( tokenValue() ); + consumeToken(); + break; + case Lexer::LineFeeds: + for ( unsigned int i = 0, end = tokenValue().toUInt() ; i < end ; ++i ) + if ( scriptBuilder() ) // better check every iteration, b/c + // we call out to ScriptBuilder, + // where nasty things might happen! + scriptBuilder()->lineFeed(); + consumeToken(); + break; + default: ; // make compiler happy + } + } + if ( lexer.error() && scriptBuilder() ) + scriptBuilder()->error( lexer.error() ); + return !lexer.error(); + } + + bool Parser::Impl::parse() { + // this is the entry point: START := command-list + if ( !parseCommandList() ) + return false; + if ( !atEnd() ) { + makeUnexpectedTokenError( Error::ExpectedCommand ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->finished(); + return true; + } + + + bool Parser::Impl::parseCommandList() { + // our ABNF: + // command-list := *comand + + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + if ( token() == Lexer::None ) + continue; + if ( token() != Lexer::Identifier ) + return true; + if ( !parseCommand() ) { + assert( error() ); + return false; + } + } + return true; + } + + + bool Parser::Impl::parseCommand() { + // command := identifier arguments ( ";" / block ) + // arguments := *argument [ test / test-list ] + // block := "{" *command "}" + // our ABNF: + // block := "{" [ command-list ] "}" + + if ( atEnd() ) + return false; + + // + // identifier + // + + if ( !obtainToken() || token() != Lexer::Identifier ) + return false; + + if ( scriptBuilder() ) + scriptBuilder()->commandStart( tokenValue() ); + consumeToken(); + + // + // *argument + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( isArgumentToken() && !parseArgumentList() ) { + assert( error() ); + return false; + } + + // + // test / test-list + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list + if ( !parseTestList() ) { + assert( error() ); + return false; + } + } else if ( token() == Lexer::Identifier ) { // should be test: + if ( !parseTest() ) { + assert( error() ); + return false; + } + } + + // + // ";" / block + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( token() != Lexer::Special ) { + makeUnexpectedTokenError( Error::ExpectedBlockOrSemicolon ); + return false; + } + + if ( tokenValue() == ";" ) + consumeToken(); + else if ( tokenValue() == "{" ) { // block + if ( !parseBlock() ) + return false; // it's an error since we saw '{' + } else { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( scriptBuilder() ) + scriptBuilder()->commandEnd(); + return true; + } + + + bool Parser::Impl::parseArgumentList() { + // our ABNF: + // argument-list := *argument + + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + if ( !isArgumentToken() ) + return true; + if ( !parseArgument() ) + return !error(); + } + return true; + } + + + bool Parser::Impl::parseArgument() { + // argument := string-list / number / tag + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() == Lexer::Number ) { + if ( !parseNumber() ) { + assert( error() ); + return false; + } + return true; + } else if ( token() == Lexer::Tag ) { + if ( scriptBuilder() ) + scriptBuilder()->taggedArgument( tokenValue() ); + consumeToken(); + return true; + } else if ( isStringToken() ) { + if ( scriptBuilder() ) + scriptBuilder()->stringArgument( tokenValue(), token() == Lexer::MultiLineString, QString::null ); + consumeToken(); + return true; + } else if ( token() == Lexer::Special && tokenValue() == "[" ) { + if ( !parseStringList() ) { + assert( error() ); + return false; + } + return true; + } + + return false; + } + + + bool Parser::Impl::parseTestList() { + // test-list := "(" test *("," test) ")" + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Special || tokenValue() != "(" ) + return false; + if ( scriptBuilder() ) + scriptBuilder()->testListStart(); + consumeToken(); + + // generic while/switch construct for comma-separated lists. See + // parseStringList() for another one. Any fix here is like to apply there, too. + bool lastWasComma = true; + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + + switch ( token() ) { + case Lexer::None: + break; + case Lexer::Special: + assert( tokenValue().length() == 1 ); + assert( tokenValue()[0].latin1() ); + switch ( tokenValue()[0].latin1() ) { + case ')': + consumeToken(); + if ( lastWasComma ) { + makeError( Error::ConsecutiveCommasInTestList ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->testListEnd(); + return true; + case ',': + consumeToken(); + if( lastWasComma ) { + makeError( Error::ConsecutiveCommasInTestList ); + return false; + } + lastWasComma = true; + break; + default: + makeError( Error::NonStringInStringList ); + return false; + } + break; + + case Lexer::Identifier: + if ( !lastWasComma ) { + makeError( Error::MissingCommaInTestList ); + return false; + } else { + lastWasComma = false; + if ( !parseTest() ) { + assert( error() ); + return false; + } + } + break; + + default: + makeUnexpectedTokenError( Error::NonTestInTestList ); + return false; + } + } + + makeError( Error::PrematureEndOfTestList ); + return false; + } + + + bool Parser::Impl::parseTest() { + // test := identifier arguments + // arguments := *argument [ test / test-list ] + + // + // identifier + // + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Identifier ) + return false; + + if ( scriptBuilder() ) + scriptBuilder()->testStart( tokenValue() ); + consumeToken(); + + // + // *argument + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) // a test w/o args + goto TestEnd; + + if ( isArgumentToken() && !parseArgumentList() ) { + assert( error() ); + return false; + } + + // + // test / test-list + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) // a test w/o nested tests + goto TestEnd; + + if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list + if ( !parseTestList() ) { + assert( error() ); + return false; + } + } else if ( token() == Lexer::Identifier ) { // should be test: + if ( !parseTest() ) { + assert( error() ); + return false; + } + } + + TestEnd: + if ( scriptBuilder() ) + scriptBuilder()->testEnd(); + return true; + } + + + bool Parser::Impl::parseBlock() { + // our ABNF: + // block := "{" [ command-list ] "}" + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Special || tokenValue() != "{" ) + return false; + if ( scriptBuilder() ) + scriptBuilder()->blockStart(); + consumeToken(); + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::PrematureEndOfBlock ); + return false; + } + + if ( token() == Lexer::Identifier ) { + if ( !parseCommandList() ) { + assert( error() ); + return false; + } + } + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::PrematureEndOfBlock ); + return false; + } + + if ( token() != Lexer::Special || tokenValue() != "}" ) { + makeError( Error::NonCommandInCommandList ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->blockEnd(); + consumeToken(); + return true; + } + + bool Parser::Impl::parseStringList() { + // string-list := "[" string *("," string) "]" / string + // ;; if there is only a single string, the brackets are optional + // + // However, since strings are already handled separately from + // string lists in parseArgument(), our ABNF is modified to: + // string-list := "[" string *("," string) "]" + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Special || tokenValue() != "[" ) + return false; + + if ( scriptBuilder() ) + scriptBuilder()->stringListArgumentStart(); + consumeToken(); + + // generic while/switch construct for comma-separated lists. See + // parseTestList() for another one. Any fix here is like to apply there, too. + bool lastWasComma = true; + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + + switch ( token() ) { + case Lexer::None: + break; + case Lexer::Special: + assert( tokenValue().length() == 1 ); + switch ( tokenValue()[0].latin1() ) { + case ']': + consumeToken(); + if ( lastWasComma ) { + makeError( Error::ConsecutiveCommasInStringList ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->stringListArgumentEnd(); + return true; + case ',': + consumeToken(); + if ( lastWasComma ) { + makeError( Error::ConsecutiveCommasInStringList ); + return false; + } + lastWasComma = true; + break; + default: + makeError( Error::NonStringInStringList ); + return false; + } + break; + + case Lexer::QuotedString: + case Lexer::MultiLineString: + if ( !lastWasComma ) { + makeError( Error::MissingCommaInStringList ); + return false; + } + lastWasComma = false; + if ( scriptBuilder() ) + scriptBuilder()->stringListEntry( tokenValue(), token() == Lexer::MultiLineString, QString::null ); + consumeToken(); + break; + + default: + makeError( Error::NonStringInStringList ); + return false; + } + } + + makeError( Error::PrematureEndOfStringList ); + return false; + } + + bool Parser::Impl::parseNumber() { + // The lexer returns the number including the quantifier as a + // single token value. Here, we split is an check that the number + // is not out of range: + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Number ) + return false; + + // number: + unsigned long result = 0; + unsigned int i = 0; + const QCString s = tokenValue().latin1(); + for ( const unsigned int len = s.length() ; i < len && isdigit( s[i] ) ; ++i ) { + const unsigned long digitValue = s[i] - '0' ; + if ( willOverflowULong( result, digitValue ) ) { + makeError( Error::NumberOutOfRange ); + return false; + } else { + result *= 10 ; result += digitValue ; + } + } + + // optional quantifier: + char quantifier = '\0'; + if ( i < s.length() ) { + assert( i + 1 == s.length() ); + quantifier = s[i]; + const unsigned long factor = factorForQuantifier( quantifier ); + if ( result > double(ULONG_MAX) / double(factor) ) { + makeError( Error::NumberOutOfRange ); + return false; + } + result *= factor; + } + + if ( scriptBuilder() ) + scriptBuilder()->numberArgument( result, quantifier ); + consumeToken(); + return true; + } + +} // namespace KSieve |