diff options
Diffstat (limited to 'lib/cppparser/lexer.h')
-rw-r--r-- | lib/cppparser/lexer.h | 867 |
1 files changed, 867 insertions, 0 deletions
diff --git a/lib/cppparser/lexer.h b/lib/cppparser/lexer.h new file mode 100644 index 00000000..8b3f57ed --- /dev/null +++ b/lib/cppparser/lexer.h @@ -0,0 +1,867 @@ +/* This file is part of KDevelop + Copyright (C) 2002,2003 Roberto Raggi <roberto@kdevelop.org> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ + +#ifndef LEXER_H +#define LEXER_H + +#include "driver.h" + +#include <qstring.h> +#include <qmap.h> +#include <qvaluestack.h> +#include <qpair.h> +#include <qptrvector.h> +#include <hashedstring.h> +#include <ext/hash_map> + +#define CHARTYPE QChar + +enum Type { + Token_eof = 0, + Token_identifier = 1000, + Token_number_literal, + Token_char_literal, + Token_string_literal, + Token_whitespaces, + Token_comment, + Token_preproc, + + Token_assign = 2000, + Token_ptrmem, + Token_ellipsis, + Token_scope, + Token_shift, + Token_eq, + Token_leq, + Token_geq, + Token_incr, + Token_decr, + Token_arrow, + + Token_concat, + + Token_K_DCOP, + Token_k_dcop, + Token_k_dcop_signals, + + Token_Q_OBJECT, + Token_signals, + Token_slots, + Token_emit, + Token_foreach, // qt4 [erbsland] + + Token_and, + Token_and_eq, + Token_asm, + Token_auto, + Token_bitand, + Token_bitor, + Token_bool, + Token_break, + Token_case, + Token_catch, + Token_char, + Token_class, + Token_compl, + Token_const, + Token_const_cast, + Token_continue, + Token_default, + Token_delete, + Token_do, + Token_double, + Token_dynamic_cast, + Token_else, + Token_enum, + Token_explicit, + Token_export, + Token_extern, + Token_false, + Token_float, + Token_for, + Token_friend, + Token_goto, + Token_if, + Token_inline, + Token_int, + Token_long, + Token_mutable, + Token_namespace, + Token_new, + Token_not, + Token_not_eq, + Token_operator, + Token_or, + Token_or_eq, + Token_private, + Token_protected, + Token_public, + Token_register, + Token_reinterpret_cast, + Token_return, + Token_short, + Token_signed, + Token_sizeof, + Token_static, + Token_static_cast, + Token_struct, + Token_switch, + Token_template, + Token_this, + Token_throw, + Token_true, + Token_try, + Token_typedef, + Token_typeid, + Token_typename, + Token_union, + Token_unsigned, + Token_using, + Token_virtual, + Token_void, + Token_volatile, + Token_wchar_t, + Token_while, + Token_xor, + Token_xor_eq +}; + +enum SkipType { + SkipWord, + SkipWordAndArguments +}; + +struct LexerData; + +class Token +{ + Token(const QString &); + Token( int type, int position, int length, const QString& text ); + Token( const Token& source ); + + Token& operator = ( const Token& source ); + bool operator == ( const Token& token ) const; + operator int () const; + +public: + bool isNull() const; + + int type() const; + void setType( int type ); + + void getStartPosition( int* line, int* column ) const; + void setStartPosition( int line, int column ); + void getEndPosition( int* line, int* column ) const; + void setEndPosition( int line, int column ); + + unsigned int length() const; + void setLength( unsigned int length ); + + int position() const; + void setPosition( int position ); + + QString text() const; + +private: + int m_type; + int m_position; + int m_length; + int m_startLine; + int m_startColumn; + int m_endLine; + int m_endColumn; + const QString & m_text; + + friend class Lexer; + friend class Parser; +}; // class Token + +class Lexer +{ +public: + Lexer( Driver* driver ); + ~Lexer(); + + bool recordComments() const; + void setRecordComments( bool record ); + + bool recordWhiteSpaces() const; + void setRecordWhiteSpaces( bool record ); + + bool reportWarnings() const; + void setReportWarnings( bool enable ); + + bool reportMessages() const; + void setReportMessages( bool enable ); + + bool skipWordsEnabled() const; + void setSkipWordsEnabled( bool enabled ); + + bool preprocessorEnabled() const; + void setPreprocessorEnabled( bool enabled ); + + void resetSkipWords(); + void addSkipWord( const QString& word, SkipType skipType=SkipWord, const QString& str = QString::null ); + + QString source() const; + void setSource( const QString& source ); + + int index() const; + void setIndex( int index ); + + //returns the count of lines that wer skipped due to #ifdef's + int skippedLines() const; + + void reset(); + + const Token& tokenAt( int position ) const; + const Token& nextToken(); + const Token& lookAhead( int n ) const; + + static int toInt( const Token& token ); + + int tokenPosition( const Token& token ) const; + void getTokenPosition( const Token& token, int* line, int* col ); + + int currentLine() const { return m_currentLine; } + int currentColumn() const { return m_currentColumn; } + + inline const CHARTYPE* offset( int offset ) const { + return m_source.unicode() + offset; + } + + inline int getOffset( const QChar* p ) const { + return int(p - (m_source.unicode())); + } + +private: + void setEndPtr( const QChar* c ) { + m_endPtr = c; + if( m_ptr < m_endPtr ) + m_currentChar = *m_ptr; + else + m_currentChar = QChar::null; + } + const QChar currentChar() const; + QChar peekChar( int n=1 ) const; + int currentPosition() const; + + void insertCurrent( const QString& str ); + + void tokenize(); + void nextToken( Token& token, bool stopOnNewline=false ); + void nextChar(); + void nextChar( int n ); + void skip( int l, int r ); + void readIdentifier(); + void readWhiteSpaces( bool skipNewLine=true, bool skipOnlyOnce=false ); + void readLineComment(); + void readMultiLineComment(); + void readCharLiteral(); + void readStringLiteral(); + void readNumberLiteral(); + + int findOperator3() const; + int findOperator2() const; + bool eof() const; + + // preprocessor (based on an article of Al Stevens on Dr.Dobb's journal) + int testIfLevel(); + int macroDefined(); + QString readArgument(); + + int macroPrimary(); + int macroMultiplyDivide(); + int macroAddSubtract(); + int macroRelational(); + int macroEquality(); + int macroBoolAnd(); + int macroBoolXor(); + int macroBoolOr(); + int macroLogicalAnd(); + int macroLogicalOr(); + int macroExpression(); + + void handleDirective( const QString& directive ); + void processDefine( Macro& macro ); + void processElse(); + void processElif(); + void processEndif(); + void processIf(); + void processIfdef(); + void processIfndef(); + void processInclude(); + void processUndef(); + +private: + LexerData* d; + Driver* m_driver; + QPtrVector< Token > m_tokens; + int m_size; + int m_index; + QString m_source; + const QChar* m_ptr; + const QChar* m_endPtr; + QChar m_currentChar; + bool m_recordComments; + bool m_recordWhiteSpaces; + bool m_startLine; + __gnu_cxx::hash_map< HashedString, QPair<SkipType, QString> > m_words; + + + int m_skippedLines; + int m_currentLine; + int m_currentColumn; + bool m_skipWordsEnabled; + + // preprocessor + QMemArray<bool> m_skipping; + QMemArray<bool> m_trueTest; + int m_ifLevel; + bool m_preprocessorEnabled; + bool m_inPreproc; + + bool m_reportWarnings; + bool m_reportMessages; + +private: + Lexer( const Lexer& source ); + void operator = ( const Lexer& source ); +}; + + +inline Token::Token(const QString & text = "") + : m_type( -1 ), + m_position( 0 ), + m_length( 0 ), + m_text( text ) +{ +} + +inline Token::Token( int type, int position, int length, const QString& text ) + : m_type( type ), + m_position( position ), + m_length( length ), + m_text( text ) +{ +} + +inline Token::Token( const Token& source ) + : m_type( source.m_type ), + m_position( source.m_position ), + m_length( source.m_length ), + m_startLine( source.m_startLine ), + m_startColumn( source.m_startColumn ), + m_endLine( source.m_endLine ), + m_endColumn( source.m_endColumn ), + m_text( source.m_text ) +{ +} + +inline Token& Token::operator = ( const Token& source ) +{ + m_type = source.m_type; + m_position = source.m_position; + m_length = source.m_length; + m_startLine = source.m_startLine; + m_startColumn = source.m_startColumn; + m_endLine = source.m_endLine; + m_endColumn = source.m_endColumn; +// m_text = source.m_text; + return( *this ); +} + +inline Token::operator int () const +{ + return m_type; +} + +inline bool Token::operator == ( const Token& token ) const +{ + return m_type == token.m_type && + m_position == token.m_position && + m_length == token.m_length && + m_startLine == token.m_startLine && + m_startColumn == token.m_startColumn && + m_endLine == token.m_endLine && + m_endColumn == token.m_endColumn && + m_text == token.m_text; +} + +inline bool Token::isNull() const +{ + return m_type == Token_eof || m_length == 0; +} + +inline int Token::type() const +{ + return m_type; +} + +inline void Token::setType( int type ) +{ + m_type = type; +} + +inline int Token::position() const +{ + return m_position; +} + +inline QString Token::text() const +{ + return m_text.mid(m_position, m_length); +} + +inline void Token::setStartPosition( int line, int column ) +{ + m_startLine = line; + m_startColumn = column; +} + +inline void Token::setEndPosition( int line, int column ) +{ + m_endLine = line; + m_endColumn = column; +} + +inline void Token::getStartPosition( int* line, int* column ) const +{ + if( line ) *line = m_startLine; + if( column ) *column = m_startColumn; +} + +inline void Token::getEndPosition( int* line, int* column ) const +{ + if( line ) *line = m_endLine; + if( column ) *column = m_endColumn; +} + +inline void Token::setPosition( int position ) +{ + m_position = position; +} + +inline unsigned int Token::length() const +{ + return m_length; +} + +inline void Token::setLength( unsigned int length ) +{ + m_length = length; +} + +inline bool Lexer::recordComments() const +{ + return m_recordComments; +} + +inline void Lexer::setRecordComments( bool record ) +{ + m_recordComments = record; +} + +inline bool Lexer::recordWhiteSpaces() const +{ + return m_recordWhiteSpaces; +} + +inline void Lexer::setRecordWhiteSpaces( bool record ) +{ + m_recordWhiteSpaces = record; +} + +inline QString Lexer::source() const +{ + return m_source; +} + +inline int Lexer::index() const +{ + return m_index; +} + +inline void Lexer::setIndex( int index ) +{ + m_index = index; +} + +inline const Token& Lexer::nextToken() +{ + if( m_index < m_size ) + return *m_tokens[ m_index++ ]; + + return *m_tokens[ m_index ]; +} + +inline const Token& Lexer::tokenAt( int n ) const +{ + return *m_tokens[ QMIN(n, m_size-1) ]; +} + +inline const Token& Lexer::lookAhead( int n ) const +{ + return *m_tokens[ QMIN(m_index + n, m_size-1) ]; +} + +inline int Lexer::tokenPosition( const Token& token ) const +{ + return token.position(); +} + +inline void Lexer::nextChar() +{ + if(*m_ptr == '\n') { + ++m_currentLine; + m_currentColumn = 0; + m_startLine = true; + } else { + ++m_currentColumn; + } + ++m_ptr; + + if( m_ptr < m_endPtr ) + m_currentChar = *m_ptr; + else + m_currentChar = QChar::null; +} + +inline void Lexer::nextChar( int n ) +{ + m_currentColumn += n; + m_ptr += n; + + if( m_ptr < m_endPtr ) + m_currentChar = *m_ptr; + else + m_currentChar = QChar::null; +} + +inline void Lexer::readIdentifier() +{ + while( currentChar().isLetterOrNumber() || currentChar() == '_' ) + nextChar(); +} + +inline void Lexer::readWhiteSpaces( bool skipNewLine, bool skipOnlyOnce ) +{ + while( !currentChar().isNull() ){ + QChar ch = currentChar(); + + if( ch == '\n' && !skipNewLine ){ + break; + } else if( ch.isSpace() ){ + nextChar(); + } else if( m_inPreproc && currentChar() == '\\' ){ + nextChar(); + readWhiteSpaces( true, true ); + } else { + break; + } + if( skipOnlyOnce && ch == '\n' ) { + skipNewLine = false; + } + } +} + +//little hack for better performance +inline bool isTodo( const QString& txt, int position ) { + if( txt.length() < position + 4 ) return false; + return (txt[ position ] == 't' || txt[ position ] == 'T') + && (txt[ position+1 ] == 'o' || txt[ position+1 ] == 'O') + && (txt[ position+2 ] == 'd' || txt[ position+2 ] == 'D') + && (txt[ position+3 ] == 'o' || txt[ position+3 ] == 'O'); +} + +inline bool isFixme( const QString& txt, int position ) { + if( txt.length() < position + 5 ) return false; + return (txt[ position ] == 'f' || txt[ position ] == 'F') + && (txt[ position+1 ] == 'i' || txt[ position+1 ] == 'I') + && (txt[ position+2 ] == 'x' || txt[ position+2 ] == 'X') + && (txt[ position+3 ] == 'm' || txt[ position+3 ] == 'M') + && (txt[ position+4 ] == 'e' || txt[ position+4 ] == 'E'); +} + +inline void Lexer::readLineComment() +{ + while( !currentChar().isNull() && currentChar() != '\n' ){ + if( m_reportMessages && isTodo( m_source, currentPosition() ) ){ + nextChar( 4 ); + QString msg; + int line = m_currentLine; + int col = m_currentColumn; + + while( currentChar() ){ + if( currentChar() == '*' && peekChar() == '/' ) + break; + else if( currentChar() == '\n' ) + break; + + msg += currentChar(); + nextChar(); + } + m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) ); + } else + if( m_reportMessages && isFixme( m_source, currentPosition() ) ){ + nextChar( 5 ); + QString msg; + int line = m_currentLine; + int col = m_currentColumn; + + while( currentChar() ){ + if( currentChar() == '*' && peekChar() == '/' ) + break; + else if( currentChar() == '\n' ) + break; + + msg += currentChar(); + nextChar(); + } + m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) ); + } else + nextChar(); + } +} + +inline void Lexer::readMultiLineComment() +{ + while( !currentChar().isNull() ){ + if( currentChar() == '*' && peekChar() == '/' ){ + nextChar( 2 ); + return; + } else if( m_reportMessages && isTodo( m_source, currentPosition() ) ){ + nextChar( 4 ); + QString msg; + int line = m_currentLine; + int col = m_currentColumn; + + while( currentChar() ){ + if( currentChar() == '*' && peekChar() == '/' ) + break; + else if( currentChar() == '\n' ) + break; + msg += currentChar(); + nextChar(); + } + m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) ); + } else + if( m_reportMessages && isFixme( m_source, currentPosition() ) ) { + nextChar( 5 ); + QString msg; + int line = m_currentLine; + int col = m_currentColumn; + + while( currentChar() ){ + if( currentChar() == '*' && peekChar() == '/' ) + break; + else if( currentChar() == '\n' ) + break; + + msg += currentChar(); + nextChar(); + } + m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) ); + } else + nextChar(); + } +} + +inline void Lexer::readCharLiteral() +{ + if( currentChar() == '\'' ) + nextChar(); // skip ' + else if( currentChar() == 'L' && peekChar() == '\'' ) + nextChar( 2 ); // slip L' + else + return; + + while( !currentChar().isNull() ){ + int len = getOffset( m_endPtr ) - currentPosition(); + + if( len>=2 && (currentChar() == '\\' && peekChar() == '\'') ){ + nextChar( 2 ); + } else if( len>=2 && (currentChar() == '\\' && peekChar() == '\\') ){ + nextChar( 2 ); + } else if( currentChar() == '\'' ){ + nextChar(); + break; + } else { + nextChar(); + } + } +} + +inline void Lexer::readStringLiteral() +{ + if( currentChar() != '"' ) + return; + + nextChar(); // skip " + + while( !currentChar().isNull() ){ + int len = getOffset( m_endPtr ) - currentPosition(); + + if( len>=2 && currentChar() == '\\' && peekChar() == '"' ){ + nextChar( 2 ); + } else if( len>=2 && currentChar() == '\\' && peekChar() == '\\' ){ + nextChar( 2 ); + } else if( currentChar() == '"' ){ + nextChar(); + break; + } else { + nextChar(); + } + } +} + +inline void Lexer::readNumberLiteral() +{ + while( currentChar().isLetterOrNumber() || currentChar() == '.' ) + nextChar(); +} + +inline int Lexer::findOperator3() const +{ + int n = getOffset( m_endPtr ) - currentPosition(); + + if( n >= 3){ + QChar ch = currentChar(), ch1=peekChar(), ch2=peekChar(2); + + if( ch == '<' && ch1 == '<' && ch2 == '=' ) return Token_assign; + else if( ch == '>' && ch1 == '>' && ch2 == '=' ) return Token_assign; + else if( ch == '-' && ch1 == '>' && ch2 == '*' ) return Token_ptrmem; + else if( ch == '.' && ch1 == '.' && ch2 == '.' ) return Token_ellipsis; + } + + return -1; +} + +inline int Lexer::findOperator2() const +{ + int n = getOffset( m_endPtr ) - currentPosition(); + + if( n>=2 ){ + QChar ch = currentChar(), ch1=peekChar(); + + if( ch == ':' && ch1 == ':' ) return Token_scope; + else if( ch == '.' && ch1 == '*' ) return Token_ptrmem; + else if( ch == '+' && ch1 == '=' ) return Token_assign; + else if( ch == '-' && ch1 == '=' ) return Token_assign; + else if( ch == '*' && ch1 == '=' ) return Token_assign; + else if( ch == '/' && ch1 == '=' ) return Token_assign; + else if( ch == '%' && ch1 == '=' ) return Token_assign; + else if( ch == '^' && ch1 == '=' ) return Token_assign; + else if( ch == '&' && ch1 == '=' ) return Token_assign; + else if( ch == '|' && ch1 == '=' ) return Token_assign; + else if( ch == '<' && ch1 == '<' ) return Token_shift; + else if( ch == '>' && ch1 == '>' ) return Token_shift; + else if( ch == '=' && ch1 == '=' ) return Token_eq; + else if( ch == '!' && ch1 == '=' ) return Token_eq; + else if( ch == '<' && ch1 == '=' ) return Token_leq; + else if( ch == '>' && ch1 == '=' ) return Token_geq; + else if( ch == '&' && ch1 == '&' ) return Token_and; + else if( ch == '|' && ch1 == '|' ) return Token_or; + else if( ch == '+' && ch1 == '+' ) return Token_incr; + else if( ch == '-' && ch1 == '-' ) return Token_decr; + else if( ch == '-' && ch1 == '>' ) return Token_arrow; + else if( ch == '#' && ch1 == '#' ) return Token_concat; + } + + return -1; +} + +inline bool Lexer::skipWordsEnabled() const +{ + return m_skipWordsEnabled; +} + +inline void Lexer::setSkipWordsEnabled( bool enabled ) +{ + m_skipWordsEnabled = enabled; +} + +inline bool Lexer::preprocessorEnabled() const +{ + return m_preprocessorEnabled; +} + +inline void Lexer::setPreprocessorEnabled( bool enabled ) +{ + m_preprocessorEnabled = enabled; +} + +inline int Lexer::currentPosition() const +{ + return getOffset( m_ptr ); +} + +inline const QChar Lexer::currentChar() const +{ + return m_currentChar; +} + +inline QChar Lexer::peekChar( int n ) const +{ + const QChar* p = m_ptr + n; + + if( p < m_endPtr ) + return *p; + else + return QChar::null; +} + +inline bool Lexer::eof() const +{ + return m_ptr >= m_endPtr; +} + +inline bool Lexer::reportWarnings() const +{ + return m_reportWarnings; +} + +inline void Lexer::setReportWarnings( bool enable ) +{ + m_reportWarnings = enable; +} + +inline bool Lexer::reportMessages() const +{ + return m_reportMessages; +} + +inline void Lexer::setReportMessages( bool enable ) +{ + m_reportMessages = enable; +} + +inline void Lexer::insertCurrent( const QString& str ) { + int posi = currentPosition(); + m_source.insert( posi, str ); + + m_ptr = offset( posi ); + m_endPtr = offset( m_source.length() ); + if( m_ptr < m_endPtr ) + m_currentChar = *m_ptr; + else + m_currentChar = QChar::null; +} + +#endif |