diff options
author | toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2009-11-25 17:56:58 +0000 |
---|---|---|
committer | toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2009-11-25 17:56:58 +0000 |
commit | bd9e6617827818fd043452c08c606f07b78014a0 (patch) | |
tree | 425bb4c3168f9c02f10150f235d2cb998dcc6108 /poxml | |
download | tdesdk-bd9e6617827818fd043452c08c606f07b78014a0.tar.gz tdesdk-bd9e6617827818fd043452c08c606f07b78014a0.zip |
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.
BUG:215923
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdesdk@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'poxml')
100 files changed, 11596 insertions, 0 deletions
diff --git a/poxml/GettextLexer.cpp b/poxml/GettextLexer.cpp new file mode 100644 index 00000000..cc768114 --- /dev/null +++ b/poxml/GettextLexer.cpp @@ -0,0 +1,550 @@ +/* $ANTLR 2.7.1: "gettext.g" -> "GettextLexer.cpp"$ */ +#include "GettextLexer.hpp" +#include "antlr/CharBuffer.hpp" +#include "antlr/TokenStreamException.hpp" +#include "antlr/TokenStreamIOException.hpp" +#include "antlr/TokenStreamRecognitionException.hpp" +#include "antlr/CharStreamException.hpp" +#include "antlr/CharStreamIOException.hpp" +#include "antlr/NoViableAltForCharException.hpp" + +#line 1 "gettext.g" + +#line 14 "GettextLexer.cpp" +GettextLexer::GettextLexer(ANTLR_USE_NAMESPACE(std)istream& in) + : ANTLR_USE_NAMESPACE(antlr)CharScanner(new ANTLR_USE_NAMESPACE(antlr)CharBuffer(in)) +{ + setCaseSensitive(true); + initLiterals(); +} + +GettextLexer::GettextLexer(ANTLR_USE_NAMESPACE(antlr)InputBuffer& ib) + : ANTLR_USE_NAMESPACE(antlr)CharScanner(ib) +{ + setCaseSensitive(true); + initLiterals(); +} + +GettextLexer::GettextLexer(const ANTLR_USE_NAMESPACE(antlr)LexerSharedInputState& state) + : ANTLR_USE_NAMESPACE(antlr)CharScanner(state) +{ + setCaseSensitive(true); + initLiterals(); +} + +void GettextLexer::initLiterals() +{ +} +bool GettextLexer::getCaseSensitiveLiterals() const +{ + return true; +} + +ANTLR_USE_NAMESPACE(antlr)RefToken GettextLexer::nextToken() +{ + ANTLR_USE_NAMESPACE(antlr)RefToken theRetToken; + for (;;) { + ANTLR_USE_NAMESPACE(antlr)RefToken theRetToken; + int _ttype = ANTLR_USE_NAMESPACE(antlr)Token::INVALID_TYPE; + resetText(); + try { // for char stream error handling + try { // for lexical error handling + switch ( LA(1)) { + case static_cast<unsigned char>('\t'): + case static_cast<unsigned char>('\n'): + case static_cast<unsigned char>('\r'): + case static_cast<unsigned char>(' '): + { + mWS(true); + theRetToken=_returnToken; + break; + } + case static_cast<unsigned char>('['): + { + mL_BRACKET(true); + theRetToken=_returnToken; + break; + } + case static_cast<unsigned char>(']'): + { + mR_BRACKET(true); + theRetToken=_returnToken; + break; + } + case static_cast<unsigned char>('0'): + case static_cast<unsigned char>('1'): + case static_cast<unsigned char>('2'): + case static_cast<unsigned char>('3'): + case static_cast<unsigned char>('4'): + case static_cast<unsigned char>('5'): + case static_cast<unsigned char>('6'): + case static_cast<unsigned char>('7'): + case static_cast<unsigned char>('8'): + case static_cast<unsigned char>('9'): + { + mT_INT(true); + theRetToken=_returnToken; + break; + } + case static_cast<unsigned char>('#'): + { + mT_COMMENT(true); + theRetToken=_returnToken; + break; + } + case static_cast<unsigned char>('m'): + { + mMSG_TAG(true); + theRetToken=_returnToken; + break; + } + case static_cast<unsigned char>('"'): + { + mT_STRING(true); + theRetToken=_returnToken; + break; + } + default: + { + if (LA(1)==EOF_CHAR) {uponEOF(); _returnToken = makeToken(ANTLR_USE_NAMESPACE(antlr)Token::EOF_TYPE);} + else {throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine());} + } + } + if ( !_returnToken ) goto tryAgain; // found SKIP token + _ttype = _returnToken->getType(); + _returnToken->setType(_ttype); + return _returnToken; + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& e) { + throw ANTLR_USE_NAMESPACE(antlr)TokenStreamRecognitionException(e); + } + } + catch (ANTLR_USE_NAMESPACE(antlr)CharStreamIOException& csie) { + throw ANTLR_USE_NAMESPACE(antlr)TokenStreamIOException(csie.io); + } + catch (ANTLR_USE_NAMESPACE(antlr)CharStreamException& cse) { + throw ANTLR_USE_NAMESPACE(antlr)TokenStreamException(cse.getMessage()); + } +tryAgain:; + } +} + +void GettextLexer::mWS(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = WS; + int _saveIndex; + + { + switch ( LA(1)) { + case static_cast<unsigned char>(' '): + { + match(static_cast<unsigned char>(' ')); + break; + } + case static_cast<unsigned char>('\t'): + { + match(static_cast<unsigned char>('\t')); + break; + } + case static_cast<unsigned char>('\n'): + case static_cast<unsigned char>('\r'): + { + { + switch ( LA(1)) { + case static_cast<unsigned char>('\n'): + { + match(static_cast<unsigned char>('\n')); + break; + } + case static_cast<unsigned char>('\r'): + { + match("\r\n"); + break; + } + default: + { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine()); + } + } + } +#line 110 "gettext.g" + newline(); +#line 173 "GettextLexer.cpp" + break; + } + default: + { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine()); + } + } + } +#line 111 "gettext.g" + _ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; +#line 184 "GettextLexer.cpp" + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + +void GettextLexer::mL_BRACKET(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = L_BRACKET; + int _saveIndex; + + match(static_cast<unsigned char>('[')); + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + +void GettextLexer::mR_BRACKET(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = R_BRACKET; + int _saveIndex; + + match(static_cast<unsigned char>(']')); + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + +void GettextLexer::mT_INT(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = T_INT; + int _saveIndex; + + { + int _cnt26=0; + for (;;) { + if (((LA(1) >= static_cast<unsigned char>('0') && LA(1) <= static_cast<unsigned char>('9')))) { + matchRange(static_cast<unsigned char>('0'),static_cast<unsigned char>('9')); + } + else { + if ( _cnt26>=1 ) { goto _loop26; } else {throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine());} + } + + _cnt26++; + } + _loop26:; + } + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + +void GettextLexer::mT_COMMENT(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = T_COMMENT; + int _saveIndex; + + match(static_cast<unsigned char>('#')); + { + for (;;) { + if ((_tokenSet_0.member(LA(1)))) { + matchNot(static_cast<unsigned char>('\n')); + } + else { + goto _loop29; + } + + } + _loop29:; + } + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + +void GettextLexer::mMSG_TAG(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = MSG_TAG; + int _saveIndex; + + match("msg"); + { + switch ( LA(1)) { + case static_cast<unsigned char>('i'): + { + { + match("id"); + } + { + if ((LA(1)==static_cast<unsigned char>('_'))) { + match("_plural"); +#line 126 "gettext.g" + _ttype = T_MSGID_PLURAL; +#line 292 "GettextLexer.cpp" + } + else { + match(""); +#line 125 "gettext.g" + _ttype = T_MSGID; +#line 298 "GettextLexer.cpp" + } + + } + break; + } + case static_cast<unsigned char>('s'): + { + match("str"); +#line 128 "gettext.g" + _ttype = T_MSGSTR; +#line 309 "GettextLexer.cpp" + break; + } + default: + { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine()); + } + } + } + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + +void GettextLexer::mT_STRING(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = T_STRING; + int _saveIndex; + + { + int _cnt43=0; + for (;;) { + if ((LA(1)==static_cast<unsigned char>('"'))) { + _saveIndex=text.length(); + match(static_cast<unsigned char>('"')); + text.erase(_saveIndex); + { + for (;;) { + if ((LA(1)==static_cast<unsigned char>('\\'))) { + mESC(false); + } + else if ((_tokenSet_1.member(LA(1)))) { + matchNot(static_cast<unsigned char>('"')); + } + else { + goto _loop37; + } + + } + _loop37:; + } + { + _saveIndex=text.length(); + match(static_cast<unsigned char>('"')); + text.erase(_saveIndex); + { + for (;;) { + switch ( LA(1)) { + case static_cast<unsigned char>(' '): + { + match(static_cast<unsigned char>(' ')); + break; + } + case static_cast<unsigned char>('t'): + { + match(static_cast<unsigned char>('t')); + break; + } + default: + { + goto _loop40; + } + } + } + _loop40:; + } + _saveIndex=text.length(); + match(static_cast<unsigned char>('\n')); + text.erase(_saveIndex); +#line 133 "gettext.g" + newline(); +#line 383 "GettextLexer.cpp" + { + for (;;) { + switch ( LA(1)) { + case static_cast<unsigned char>(' '): + { + _saveIndex=text.length(); + match(static_cast<unsigned char>(' ')); + text.erase(_saveIndex); + break; + } + case static_cast<unsigned char>('\t'): + { + _saveIndex=text.length(); + match(static_cast<unsigned char>('\t')); + text.erase(_saveIndex); + break; + } + default: + { + goto _loop42; + } + } + } + _loop42:; + } + } + } + else { + if ( _cnt43>=1 ) { goto _loop43; } else {throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine());} + } + + _cnt43++; + } + _loop43:; + } + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + +void GettextLexer::mESC(bool _createToken) { + int _ttype; ANTLR_USE_NAMESPACE(antlr)RefToken _token; int _begin=text.length(); + _ttype = ESC; + int _saveIndex; + + match(static_cast<unsigned char>('\\')); + { + switch ( LA(1)) { + case static_cast<unsigned char>('n'): + { + match(static_cast<unsigned char>('n')); + break; + } + case static_cast<unsigned char>('r'): + { + match(static_cast<unsigned char>('r')); + break; + } + case static_cast<unsigned char>('t'): + { + match(static_cast<unsigned char>('t')); + break; + } + case static_cast<unsigned char>('b'): + { + match(static_cast<unsigned char>('b')); + break; + } + case static_cast<unsigned char>('f'): + { + match(static_cast<unsigned char>('f')); + break; + } + case static_cast<unsigned char>('"'): + { + match(static_cast<unsigned char>('"')); + break; + } + case static_cast<unsigned char>('\''): + { + match(static_cast<unsigned char>('\'')); + break; + } + case static_cast<unsigned char>('\\'): + { + match(static_cast<unsigned char>('\\')); + break; + } + case static_cast<unsigned char>('0'): + case static_cast<unsigned char>('1'): + case static_cast<unsigned char>('2'): + case static_cast<unsigned char>('3'): + { + { + matchRange(static_cast<unsigned char>('0'),static_cast<unsigned char>('3')); + } + { + if (((LA(1) >= static_cast<unsigned char>('0') && LA(1) <= static_cast<unsigned char>('9')))) { + { + matchRange(static_cast<unsigned char>('0'),static_cast<unsigned char>('9')); + } + { + if (((LA(1) >= static_cast<unsigned char>('0') && LA(1) <= static_cast<unsigned char>('9')))) { + matchRange(static_cast<unsigned char>('0'),static_cast<unsigned char>('9')); + } + else if (((LA(1) >= static_cast<unsigned char>('\0') && LA(1) <= static_cast<unsigned char>('\377')))) { + } + else { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine()); + } + + } + } + else if (((LA(1) >= static_cast<unsigned char>('\0') && LA(1) <= static_cast<unsigned char>('\377')))) { + } + else { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine()); + } + + } + break; + } + case static_cast<unsigned char>('4'): + case static_cast<unsigned char>('5'): + case static_cast<unsigned char>('6'): + case static_cast<unsigned char>('7'): + { + { + matchRange(static_cast<unsigned char>('4'),static_cast<unsigned char>('7')); + } + { + if (((LA(1) >= static_cast<unsigned char>('0') && LA(1) <= static_cast<unsigned char>('9')))) { + { + matchRange(static_cast<unsigned char>('0'),static_cast<unsigned char>('9')); + } + } + else if (((LA(1) >= static_cast<unsigned char>('\0') && LA(1) <= static_cast<unsigned char>('\377')))) { + } + else { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine()); + } + + } + break; + } + default: + { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltForCharException(LA(1), getFilename(), getLine()); + } + } + } + if ( _createToken && _token==ANTLR_USE_NAMESPACE(antlr)nullToken && _ttype!=ANTLR_USE_NAMESPACE(antlr)Token::SKIP ) { + _token = makeToken(_ttype); + _token->setText(text.substr(_begin, text.length()-_begin)); + } + _returnToken = _token; + _saveIndex=0; +} + + +const unsigned long GettextLexer::_tokenSet_0_data_[] = { 4294966271UL, 4294967295UL, 4294967295UL, 4294967295UL, 4294967295UL, 4294967295UL, 4294967295UL, 4294967295UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL }; +const ANTLR_USE_NAMESPACE(antlr)BitSet GettextLexer::_tokenSet_0(_tokenSet_0_data_,16); +const unsigned long GettextLexer::_tokenSet_1_data_[] = { 4294967295UL, 4294967291UL, 4026531839UL, 4294967295UL, 4294967295UL, 4294967295UL, 4294967295UL, 4294967295UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL }; +const ANTLR_USE_NAMESPACE(antlr)BitSet GettextLexer::_tokenSet_1(_tokenSet_1_data_,16); + diff --git a/poxml/GettextLexer.hpp b/poxml/GettextLexer.hpp new file mode 100644 index 00000000..951ad423 --- /dev/null +++ b/poxml/GettextLexer.hpp @@ -0,0 +1,47 @@ +#ifndef INC_GettextLexer_hpp_ +#define INC_GettextLexer_hpp_ + +#line 2 "gettext.g" + +#include <string> +using namespace std; +#include "parser.h" + +#line 11 "GettextLexer.hpp" +#include "antlr/config.hpp" +/* $ANTLR 2.7.1: "gettext.g" -> "GettextLexer.hpp"$ */ +#include "antlr/CommonToken.hpp" +#include "antlr/InputBuffer.hpp" +#include "antlr/BitSet.hpp" +#include "GettextParserTokenTypes.hpp" +#include "antlr/CharScanner.hpp" +class GettextLexer : public ANTLR_USE_NAMESPACE(antlr)CharScanner, public GettextParserTokenTypes + { +#line 1 "gettext.g" +#line 22 "GettextLexer.hpp" +private: + void initLiterals(); +public: + bool getCaseSensitiveLiterals() const; +public: + GettextLexer(ANTLR_USE_NAMESPACE(std)istream& in); + GettextLexer(ANTLR_USE_NAMESPACE(antlr)InputBuffer& ib); + GettextLexer(const ANTLR_USE_NAMESPACE(antlr)LexerSharedInputState& state); + ANTLR_USE_NAMESPACE(antlr)RefToken nextToken(); + public: void mWS(bool _createToken); + public: void mL_BRACKET(bool _createToken); + public: void mR_BRACKET(bool _createToken); + public: void mT_INT(bool _createToken); + public: void mT_COMMENT(bool _createToken); + public: void mMSG_TAG(bool _createToken); + public: void mT_STRING(bool _createToken); + protected: void mESC(bool _createToken); +private: + + static const unsigned long _tokenSet_0_data_[]; + static const ANTLR_USE_NAMESPACE(antlr)BitSet _tokenSet_0; + static const unsigned long _tokenSet_1_data_[]; + static const ANTLR_USE_NAMESPACE(antlr)BitSet _tokenSet_1; +}; + +#endif /*INC_GettextLexer_hpp_*/ diff --git a/poxml/GettextParser.cpp b/poxml/GettextParser.cpp new file mode 100644 index 00000000..90651eaa --- /dev/null +++ b/poxml/GettextParser.cpp @@ -0,0 +1,414 @@ +/* $ANTLR 2.7.1: "gettext.g" -> "GettextParser.cpp"$ */ +#include "GettextParser.hpp" +#include "antlr/NoViableAltException.hpp" +#include "antlr/SemanticException.hpp" +#line 12 "gettext.g" + +#include <iostream> +#include "GettextLexer.hpp" +#include "GettextParser.hpp" +#include "antlr/AST.hpp" +#include "antlr/CommonAST.hpp" + +/* +int main() +{ + ANTLR_USING_NAMESPACE(std) + ANTLR_USING_NAMESPACE(antlr) + try { + GettextLexer lexer(cin); + GettextParser parser(lexer); + parser.file(); + + } catch(exception& e) { + cerr << "exception: " << e.what() << endl; + } +} +*/ + +#line 30 "GettextParser.cpp" +GettextParser::GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenBuffer& tokenBuf, int k) +: ANTLR_USE_NAMESPACE(antlr)LLkParser(tokenBuf,k) +{ + setTokenNames(_tokenNames); +} + +GettextParser::GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenBuffer& tokenBuf) +: ANTLR_USE_NAMESPACE(antlr)LLkParser(tokenBuf,1) +{ + setTokenNames(_tokenNames); +} + +GettextParser::GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenStream& lexer, int k) +: ANTLR_USE_NAMESPACE(antlr)LLkParser(lexer,k) +{ + setTokenNames(_tokenNames); +} + +GettextParser::GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenStream& lexer) +: ANTLR_USE_NAMESPACE(antlr)LLkParser(lexer,1) +{ + setTokenNames(_tokenNames); +} + +GettextParser::GettextParser(const ANTLR_USE_NAMESPACE(antlr)ParserSharedInputState& state) +: ANTLR_USE_NAMESPACE(antlr)LLkParser(state,1) +{ + setTokenNames(_tokenNames); +} + + MsgList GettextParser::file() { +#line 43 "gettext.g" + MsgList ml ; +#line 64 "GettextParser.cpp" +#line 43 "gettext.g" + + string c, mi, ms; + MsgBlock mb; + MsgList ml2; + +#line 71 "GettextParser.cpp" + + try { // for error handling + bool synPredMatched3 = false; + if (((LA(1)==T_MSGID||LA(1)==T_COMMENT))) { + int _m3 = mark(); + synPredMatched3 = true; + inputState->guessing++; + try { + { + comment(); + match(T_MSGID); + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& pe) { + synPredMatched3 = false; + } + rewind(_m3); + inputState->guessing--; + } + if ( synPredMatched3 ) { + { + mb=file_block(); + ml2=file(); + if ( inputState->guessing==0 ) { +#line 49 "gettext.g" + ml = ml2; ml.append(mb); +#line 98 "GettextParser.cpp" + } + } + } + else { + bool synPredMatched6 = false; + if (((LA(1)==ANTLR_USE_NAMESPACE(antlr)Token::EOF_TYPE||LA(1)==T_COMMENT))) { + int _m6 = mark(); + synPredMatched6 = true; + inputState->guessing++; + try { + { + comment(); + match(ANTLR_USE_NAMESPACE(antlr)Token::EOF_TYPE); + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& pe) { + synPredMatched6 = false; + } + rewind(_m6); + inputState->guessing--; + } + if ( synPredMatched6 ) { + c=comment(); + if ( inputState->guessing==0 ) { +#line 50 "gettext.g" + (void)c; +#line 125 "GettextParser.cpp" + } + } + else { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltException(LT(1), getFilename()); + } + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& ex) { + if( inputState->guessing == 0 ) { + reportError(ex); + consume(); + consumeUntil(_tokenSet_0); + } else { + throw ex; + } + } + return ml ; +} + +string GettextParser::comment() { +#line 76 "gettext.g" + string s; +#line 148 "GettextParser.cpp" + ANTLR_USE_NAMESPACE(antlr)RefToken c = ANTLR_USE_NAMESPACE(antlr)nullToken; +#line 76 "gettext.g" + + string r; + +#line 154 "GettextParser.cpp" + + try { // for error handling + if ((LA(1)==T_COMMENT)) { + { + c = LT(1); + match(T_COMMENT); + r=comment(); + if ( inputState->guessing==0 ) { +#line 80 "gettext.g" + s = c->getText() + r; +#line 165 "GettextParser.cpp" + } + } + } + else if ((LA(1)==ANTLR_USE_NAMESPACE(antlr)Token::EOF_TYPE||LA(1)==T_MSGID)) { + } + else { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltException(LT(1), getFilename()); + } + + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& ex) { + if( inputState->guessing == 0 ) { + reportError(ex); + consume(); + consumeUntil(_tokenSet_1); + } else { + throw ex; + } + } + return s; +} + + MsgBlock GettextParser::file_block() { +#line 53 "gettext.g" + MsgBlock mb ; +#line 191 "GettextParser.cpp" +#line 53 "gettext.g" + + string c, mi, mip, ms; + +#line 196 "GettextParser.cpp" + + try { // for error handling + c=comment(); + mi=msgid(); + { + if ((LA(1)==T_MSGSTR)) { + { + ms=msgstr(); + if ( inputState->guessing==0 ) { +#line 59 "gettext.g" + + mb.comment = QString::fromUtf8(c.c_str()); + mb.msgid = QString::fromUtf8(mi.c_str()); + mb.msgstr = QString::fromUtf8(ms.c_str()); + +#line 212 "GettextParser.cpp" + } + } + } + else if ((LA(1)==T_MSGID_PLURAL)) { + { + mip=msgid_plural(); + ms=msgstr_plural(); + if ( inputState->guessing==0 ) { +#line 66 "gettext.g" + + mb.comment = QString::fromUtf8(c.c_str()); + mb.msgid = QString::fromUtf8(mi.c_str()); + mb.msgid_plural = QString::fromUtf8(mip.c_str()); + mb.msgstr = QString::fromUtf8(ms.c_str()); + +#line 228 "GettextParser.cpp" + } + } + } + else { + throw ANTLR_USE_NAMESPACE(antlr)NoViableAltException(LT(1), getFilename()); + } + + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& ex) { + if( inputState->guessing == 0 ) { + reportError(ex); + consume(); + consumeUntil(_tokenSet_2); + } else { + throw ex; + } + } + return mb ; +} + +string GettextParser::msgid() { +#line 84 "gettext.g" + string s; +#line 253 "GettextParser.cpp" + ANTLR_USE_NAMESPACE(antlr)RefToken t = ANTLR_USE_NAMESPACE(antlr)nullToken; + + try { // for error handling + match(T_MSGID); + t = LT(1); + match(T_STRING); + if ( inputState->guessing==0 ) { +#line 85 "gettext.g" + s = t->getText(); +#line 263 "GettextParser.cpp" + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& ex) { + if( inputState->guessing == 0 ) { + reportError(ex); + consume(); + consumeUntil(_tokenSet_3); + } else { + throw ex; + } + } + return s; +} + +string GettextParser::msgstr() { +#line 92 "gettext.g" + string s; +#line 281 "GettextParser.cpp" + ANTLR_USE_NAMESPACE(antlr)RefToken t = ANTLR_USE_NAMESPACE(antlr)nullToken; + + try { // for error handling + match(T_MSGSTR); + t = LT(1); + match(T_STRING); + if ( inputState->guessing==0 ) { +#line 93 "gettext.g" + s = t->getText(); +#line 291 "GettextParser.cpp" + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& ex) { + if( inputState->guessing == 0 ) { + reportError(ex); + consume(); + consumeUntil(_tokenSet_2); + } else { + throw ex; + } + } + return s; +} + +string GettextParser::msgid_plural() { +#line 88 "gettext.g" + string s; +#line 309 "GettextParser.cpp" + ANTLR_USE_NAMESPACE(antlr)RefToken t = ANTLR_USE_NAMESPACE(antlr)nullToken; + + try { // for error handling + match(T_MSGID_PLURAL); + t = LT(1); + match(T_STRING); + if ( inputState->guessing==0 ) { +#line 89 "gettext.g" + s = t->getText(); +#line 319 "GettextParser.cpp" + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& ex) { + if( inputState->guessing == 0 ) { + reportError(ex); + consume(); + consumeUntil(_tokenSet_4); + } else { + throw ex; + } + } + return s; +} + +string GettextParser::msgstr_plural() { +#line 96 "gettext.g" + string s; +#line 337 "GettextParser.cpp" + ANTLR_USE_NAMESPACE(antlr)RefToken n = ANTLR_USE_NAMESPACE(antlr)nullToken; + ANTLR_USE_NAMESPACE(antlr)RefToken t = ANTLR_USE_NAMESPACE(antlr)nullToken; + + try { // for error handling + { + int _cnt18=0; + for (;;) { + if ((LA(1)==T_MSGSTR)) { + match(T_MSGSTR); + match(L_BRACKET); + n = LT(1); + match(T_INT); + match(R_BRACKET); + t = LT(1); + match(T_STRING); + if ( inputState->guessing==0 ) { +#line 98 "gettext.g" + s = t->getText(); +#line 356 "GettextParser.cpp" + } + } + else { + if ( _cnt18>=1 ) { goto _loop18; } else {throw ANTLR_USE_NAMESPACE(antlr)NoViableAltException(LT(1), getFilename());} + } + + _cnt18++; + } + _loop18:; + } + } + catch (ANTLR_USE_NAMESPACE(antlr)RecognitionException& ex) { + if( inputState->guessing == 0 ) { + reportError(ex); + consume(); + consumeUntil(_tokenSet_2); + } else { + throw ex; + } + } + return s; +} + +const char* GettextParser::_tokenNames[] = { + "<0>", + "EOF", + "<2>", + "NULL_TREE_LOOKAHEAD", + "T_MSGID", + "T_COMMENT", + "T_STRING", + "T_MSGID_PLURAL", + "T_MSGSTR", + "L_BRACKET", + "T_INT", + "R_BRACKET", + "WS", + "MSG_TAG", + "ESC", + 0 +}; + +const unsigned long GettextParser::_tokenSet_0_data_[] = { 2UL, 0UL, 0UL, 0UL }; +// EOF +const ANTLR_USE_NAMESPACE(antlr)BitSet GettextParser::_tokenSet_0(_tokenSet_0_data_,4); +const unsigned long GettextParser::_tokenSet_1_data_[] = { 18UL, 0UL, 0UL, 0UL }; +// EOF T_MSGID +const ANTLR_USE_NAMESPACE(antlr)BitSet GettextParser::_tokenSet_1(_tokenSet_1_data_,4); +const unsigned long GettextParser::_tokenSet_2_data_[] = { 50UL, 0UL, 0UL, 0UL }; +// EOF T_MSGID T_COMMENT +const ANTLR_USE_NAMESPACE(antlr)BitSet GettextParser::_tokenSet_2(_tokenSet_2_data_,4); +const unsigned long GettextParser::_tokenSet_3_data_[] = { 384UL, 0UL, 0UL, 0UL }; +// T_MSGID_PLURAL T_MSGSTR +const ANTLR_USE_NAMESPACE(antlr)BitSet GettextParser::_tokenSet_3(_tokenSet_3_data_,4); +const unsigned long GettextParser::_tokenSet_4_data_[] = { 256UL, 0UL, 0UL, 0UL }; +// T_MSGSTR +const ANTLR_USE_NAMESPACE(antlr)BitSet GettextParser::_tokenSet_4(_tokenSet_4_data_,4); + + diff --git a/poxml/GettextParser.hpp b/poxml/GettextParser.hpp new file mode 100644 index 00000000..46b2b137 --- /dev/null +++ b/poxml/GettextParser.hpp @@ -0,0 +1,53 @@ +#ifndef INC_GettextParser_hpp_ +#define INC_GettextParser_hpp_ + +#line 2 "gettext.g" + +#include <string> +using namespace std; +#include "parser.h" + +#line 11 "GettextParser.hpp" +#include "antlr/config.hpp" +/* $ANTLR 2.7.1: "gettext.g" -> "GettextParser.hpp"$ */ +#include "antlr/TokenStream.hpp" +#include "antlr/TokenBuffer.hpp" +#include "GettextParserTokenTypes.hpp" +#include "antlr/LLkParser.hpp" + +class GettextParser : public ANTLR_USE_NAMESPACE(antlr)LLkParser, public GettextParserTokenTypes + { +#line 1 "gettext.g" +#line 22 "GettextParser.hpp" +protected: + GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenBuffer& tokenBuf, int k); +public: + GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenBuffer& tokenBuf); +protected: + GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenStream& lexer, int k); +public: + GettextParser(ANTLR_USE_NAMESPACE(antlr)TokenStream& lexer); + GettextParser(const ANTLR_USE_NAMESPACE(antlr)ParserSharedInputState& state); + public: MsgList file(); + public: string comment(); + public: MsgBlock file_block(); + public: string msgid(); + public: string msgstr(); + public: string msgid_plural(); + public: string msgstr_plural(); +private: + static const char* _tokenNames[]; + + static const unsigned long _tokenSet_0_data_[]; + static const ANTLR_USE_NAMESPACE(antlr)BitSet _tokenSet_0; + static const unsigned long _tokenSet_1_data_[]; + static const ANTLR_USE_NAMESPACE(antlr)BitSet _tokenSet_1; + static const unsigned long _tokenSet_2_data_[]; + static const ANTLR_USE_NAMESPACE(antlr)BitSet _tokenSet_2; + static const unsigned long _tokenSet_3_data_[]; + static const ANTLR_USE_NAMESPACE(antlr)BitSet _tokenSet_3; + static const unsigned long _tokenSet_4_data_[]; + static const ANTLR_USE_NAMESPACE(antlr)BitSet _tokenSet_4; +}; + +#endif /*INC_GettextParser_hpp_*/ diff --git a/poxml/GettextParserTokenTypes.hpp b/poxml/GettextParserTokenTypes.hpp new file mode 100644 index 00000000..05fd7408 --- /dev/null +++ b/poxml/GettextParserTokenTypes.hpp @@ -0,0 +1,22 @@ +#ifndef INC_GettextParserTokenTypes_hpp_ +#define INC_GettextParserTokenTypes_hpp_ + +/* $ANTLR 2.7.1: "gettext.g" -> "GettextParserTokenTypes.hpp"$ */ +struct GettextParserTokenTypes { + enum { + EOF_ = 1, + T_MSGID = 4, + T_COMMENT = 5, + T_STRING = 6, + T_MSGID_PLURAL = 7, + T_MSGSTR = 8, + L_BRACKET = 9, + T_INT = 10, + R_BRACKET = 11, + WS = 12, + MSG_TAG = 13, + ESC = 14, + NULL_TREE_LOOKAHEAD = 3 + }; +}; +#endif /*INC_GettextParserTokenTypes_hpp_*/ diff --git a/poxml/GettextParserTokenTypes.txt b/poxml/GettextParserTokenTypes.txt new file mode 100644 index 00000000..083e90e3 --- /dev/null +++ b/poxml/GettextParserTokenTypes.txt @@ -0,0 +1,13 @@ +// $ANTLR 2.7.1: gettext.g -> GettextParserTokenTypes.txt$ +GettextParser // output token vocab name +T_MSGID=4 +T_COMMENT=5 +T_STRING=6 +T_MSGID_PLURAL=7 +T_MSGSTR=8 +L_BRACKET=9 +T_INT=10 +R_BRACKET=11 +WS=12 +MSG_TAG=13 +ESC=14 diff --git a/poxml/Makefile.am b/poxml/Makefile.am new file mode 100644 index 00000000..bef54179 --- /dev/null +++ b/poxml/Makefile.am @@ -0,0 +1,48 @@ + +bin_PROGRAMS = split2po xml2pot po2xml swappo transxx + +INCLUDES = -I$(srcdir)/antlr $(all_includes) +KDE_CXXFLAGS = $(USE_EXCEPTIONS) + +SUBDIRS = antlr + +split2po_SOURCES = split.cpp parser.cpp +split2po_LDFLAGS = $(all_libraries) $(KDE_RPATH) +split2po_LDADD = $(LIB_QT) + +xml2pot_SOURCES = xml2pot.cpp parser.cpp +xml2pot_LDFLAGS = $(all_libraries) $(KDE_RPATH) +xml2pot_LDADD = $(LIB_QT) + +po2xml_SOURCES = GettextLexer.cpp GettextParser.cpp po2xml.cpp parser.cpp +po2xml_LDFLAGS = $(all_libraries) $(KDE_RPATH) +po2xml_LDADD = antlr/src/libantlr.la $(LIB_QT) + +swappo_SOURCES = GettextLexer.cpp GettextParser.cpp swappo.cpp parser.cpp +swappo_LDFLAGS = $(all_libraries) $(KDE_RPATH) +swappo_LDADD = antlr/src/libantlr.la $(LIB_QT) + +transxx_SOURCES = GettextLexer.cpp GettextParser.cpp transxx.cpp parser.cpp +transxx_LDFLAGS = $(all_libraries) $(KDE_RPATH) +transxx_LDADD = antlr/src/libantlr.la $(LIB_QT) + +parser: + cd $(srcdir) && java antlr.Tool gettext.g + +SUFFIXES = .pot .po .xml .txml + +lauri.pot: xml2pot lauri.xml + checkXML $(srcdir)/lauri.xml + ./xml2pot $(srcdir)/lauri.xml > lauri.pot + msgmerge -o lauri.pot lauri.pot lauri.pot + +$(srcdir)/lauri.po: lauri.pot + msgmerge -o $(srcdir)/lauri.po $(srcdir)/lauri.po lauri.pot + msgfmt --statistics $(srcdir)/lauri.po -o /dev/null + +lauri_de.xml: po2xml $(srcdir)/lauri.po $(srcdir)/lauri.xml + ./po2xml $(srcdir)/lauri.xml $(srcdir)/lauri.po | \ + sed -e "s,<!ENTITY % English,<!ENTITY % German," > lauri_de.xml + +test: lauri_de.xml + checkXML lauri_de.xml diff --git a/poxml/antlr/AUTHORS b/poxml/antlr/AUTHORS new file mode 100644 index 00000000..7bdc4852 --- /dev/null +++ b/poxml/antlr/AUTHORS @@ -0,0 +1,2 @@ +Author: + Peter Wells <pete@yamuna.demon.co.uk> diff --git a/poxml/antlr/COPYING b/poxml/antlr/COPYING new file mode 100644 index 00000000..ce9ec595 --- /dev/null +++ b/poxml/antlr/COPYING @@ -0,0 +1,32 @@ + +SOFTWARE RIGHTS + +ANTLR MageLang Institute, 1989-1999 +http://www.ANTLR.org + +We reserve no legal rights to the ANTLR--it is fully in the +public domain. An individual or company may do whatever +they wish with source code distributed with ANTLR or the +code generated by ANTLR, including the incorporation of +ANTLR, or its output, into commerical software. + +We encourage users to develop software with ANTLR. However, +we do ask that credit is given to us for developing +ANTLR. By "credit", we mean that if you use ANTLR or +incorporate any source code into one of your programs +(commercial product, research project, or otherwise) that +you acknowledge this fact somewhere in the documentation, +research report, etc... If you like ANTLR and have +developed a nice tool with the output, please mention that +you developed it using ANTLR. In addition, we ask that the +headers remain intact in our source code. As long as these +guidelines are kept, we expect to continue enhancing this +system and expect to make other tools available as they are +completed. + +The primary ANTLR guy: + +Terence Parr +MageLang Institute; http://www.MageLang.com +parrt@jguru.com +parrt@magelang.com diff --git a/poxml/antlr/ChangeLog b/poxml/antlr/ChangeLog new file mode 100644 index 00000000..735046e5 --- /dev/null +++ b/poxml/antlr/ChangeLog @@ -0,0 +1,293 @@ +Not 100% complete. Changes from develtree are not listed yet. + +Change 400 on 2000/09/27 by klaren@klaren.hawking.main + + Made little TCL script to pretty print a ChangeLog with C++ stuff. + + +Change 399 on 2000/09/27 by klaren@klaren.hawking.main + + Fixed generating too many ASTNULL checks in wrong places. + + +Change 397 on 2000/09/27 by klaren@klaren.hawking.main + + Some *UGLY* fixes for the last typecasting problems in Cpp codegen. It + now works. In 2.7.2 or later I'll fix this in a nice way. + + +Change 397 on 2000/09/27 by klaren@klaren.hawking.main + + Some *UGLY* fixes for the last typecasting problems in Cpp codegen. It + now works. In 2.7.2 or later I'll fix this in a nice way. + + +Change 394 on 2000/09/26 by klaren@klaren.hawking.main + + Prefixed Unicode optimization checks with a ASTNULL check. + + +Change 393 on 2000/09/25 by klaren@klaren.hawking.main + + Bumped up the version no from 2.7.1a4 to 2.7.1. + + +Change 380 on 2000/09/24 by parrt@parrt.foggy + + integrating ric's stuff into main + + +Change 380 on 2000/09/24 by parrt@parrt.foggy + + integrating ric's stuff into main + + +Change 380 on 2000/09/24 by parrt@parrt.foggy + + integrating ric's stuff into main + + +Change 348 on 2000/09/07 by klaren@klaren.hawking.main + + Small improvement in constructor of CommonAST. + + +Change 344 on 2000/09/06 by klaren@klaren.hawking.main + + Fixed missing namespace in generated TreeParsers as reported by Ross + Bencina. + + +Change 341 on 2000/09/06 by klaren@klaren.hawking.main + + Miniscule fix for Borland C++Builder 4.0/C++ 5.4. (extra parens) + + +Change 317 on 2000/08/22 by klaren@klaren.hawking.main + + Updated changelog for a5 (or was it 2.7.1) release.. + + +Change 316 on 2000/08/22 by klaren@klaren.hawking.main + + All kinds of small Makefile/configure tweaks. All gcc-isms should be + gone now. + + +Change 309 on 2000/08/15 by klaren@klaren.hawking.main + + Integrate bugfixes from klaren.dev to MismatchedChar/TokenException. + + +Change 297 on 2000/08/07 by klaren@klaren.kronecker.main + + Fixes for namespace/namespaceAntlr/namespaceStd/genHashLines options. + + +Change 296 on 2000/08/07 by klaren@klaren.kronecker.main + + Virtualized all functions that someone should want to override. Probably + necessary for heteroAST stuff. + + +Change 291 on 2000/08/07 by klaren@klaren.kronecker.main + + Some tweaks to configure.in and Makefile.am's. Fix for CXXFLAGS being + set incorrectly when not using gcc. + + +Change 290 on 2000/08/05 by klaren@klaren.kronecker.main + + Updated prototype of toLower to definition in cpp file. It seems I + messed them up a while back. + + +Change 289 on 2000/08/05 by klaren@klaren.kronecker.main + + Added namespace macro to out_of_range exception. + + +Change 288 on 2000/07/28 by parrt@parrt.foggy + + re-added toLower return type fix + + +Change 285 on 2000/07/19 by klaren@klaren.kronecker.main + + Fixed thinko. + + +Change 284 on 2000/07/19 by klaren@klaren.kronecker.main + + Dumped output of p4 changes -l into it... + + +Change 283 on 2000/07/19 by klaren@klaren.kronecker.main + + Fix for bug found by Michael Ebner. Bitset size was not increased in add + method. + + +Change 280 on 2000/07/19 by klaren@klaren.kronecker.main + + Made namespaceAntlr, namespaceStd and genHashlines options file-level + options. Removed nameSpace member from Tool class all is now handled in + CppCodegenerator.java. + + +Change 276 on 2000/07/18 by klaren@klaren.kronecker.main + + C++ Changes for the indented traceXXXX output as invented by Monty Zukowski + + +Change 275 on 2000/07/18 by klaren@klaren.kronecker.main + + Added missing initializer in generated code for TreeParser + + +Change 272 on 2000/07/17 by klaren@klaren.kronecker.main + + Another workspace for MSVC6 has support for dll's (for version 2.6.1). + + +Change 271 on 2000/07/17 by klaren@klaren.kronecker.main + + New autoconf/automake stuff for the C++ support library. + + +Change 270 on 2000/07/17 by klaren@klaren.kronecker.main + + Fixed error within the NO_STATIC_CONSTS #ifdef + + +Change 269 on 2000/07/17 by klaren@klaren.kronecker.main + + Move C++ files to lib/cpp/src as first step for autoconf setup + + +Change 268 on 2000/07/17 by klaren@klaren.kronecker.main + + Add contrib dir and Microsoft Visual C++ 6.0 projects supplied by John + Millaway + + +Change 260 on 2000/07/14 by klaren@klaren.kronecker.main + + Fixed crashbugs/typos in constructors of Mismatched[Token|Char]Exception + + +Change 258 on 2000/07/10 by parrt@parrt.foggy + + fixes per klaren + + +Change 258 on 2000/07/10 by parrt@parrt.foggy + + fixes per klaren + + +Change 248 on 2000/07/04 by parrt@parrt.foggy + + Ric Klaren's changes to C++ lib + + +Change 247 on 2000/07/04 by parrt@parrt.foggy + + Ric Klaren's changes for namespaces + + +Change 239 on 2000/06/03 by parrt@parrt.foggy + + adjusted so it works; header actions got converted to Token objects from + Strings; lots of cast problems and then null ptr exceptions. + +Change 235 on 2000/05/31 by pete@pete.linux + + More changes to support #line generation in C++ (from Ric Klaren) + +Change 220 on 2000/05/29 by parrt@parrt.foggy + + changed char to int for toLower + + +Change 219 on 2000/05/28 by pete@pete.linux + + Mirroring Java changes + + +Change 218 on 2000/05/28 by pete@pete.linux + + Cleaned up the #line generator a little. + + +Change 211 on 2000/05/27 by parrt@parrt.foggy + + had same bug as JavaCodeGenerator related to ~(A|B) + + +Change 205 on 2000/05/24 by pete@pete.linux + + Add support for Metrowerks Codewarrior + + +Change 203 on 2000/05/22 by pete@pete.linux + + Fix for multithreading from Jan Mikkelsen + + +Change 202 on 2000/05/21 by pete@pete.linux + + Merged in some fixes from Ric Klaren for tracing TreeParsers, cleaner + namespace code, and #line generation. + + +Change 202 on 2000/05/21 by pete@pete.linux + + Merged in some fixes from Ric Klaren for tracing TreeParsers, cleaner + namespace code, and #line generation. + +Change 201 on 2000/05/21 by pete@pete.linux + + Added destructors with empty throw specs, as suggested by Dan Field. + + +Change 200 on 2000/05/21 by pete@pete.linux + + Various performance improvements, mostly from Eric Dumas. + + +Change 183 on 2000/02/08 by pete@pete.linux + + Added support for Sun CC 5.0 (from Michael Schmitt) + + +Change 182 on 2000/02/08 by pete@pete.linux + + Fix a couple of minor problems with C++ generation (noted by Michael + Schmitt) + +Change 132 on 2000/01/18 by parrt@parrt.foggy + + setting type to ktext for everything + + +Change 132 on 2000/01/18 by parrt@parrt.foggy + + setting type to ktext for everything + + +Change 131 on 2000/01/18 by parrt@parrt.foggy + + from dev back to main + + +Change 131 on 2000/01/18 by parrt@parrt.foggy + + from dev back to main + + +Change 1 on 1999/12/13 by parrt@parrt.foggy + + adding 2.6.0 from antlr site as initial main line + + diff --git a/poxml/antlr/INSTALL b/poxml/antlr/INSTALL new file mode 100644 index 00000000..30dd4d49 --- /dev/null +++ b/poxml/antlr/INSTALL @@ -0,0 +1,183 @@ +Basic Installation +================== + + These are generic installation instructions. Check out the README for +additional info. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, a file +`config.cache' that saves the results of its tests to speed up +reconfiguring, and a file `config.log' containing compiler output +(useful mainly for debugging `configure'). + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If at some point `config.cache' +contains results you don't want to keep, you may remove or edit it. + + The file `configure.in' is used to create `configure' by a program +called `autoconf'. You only need `configure.in' if you want to change +it or regenerate `configure' using a newer version of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. If you're + using `csh' on an old version of System V, you might need to type + `sh ./configure' instead to prevent `csh' from trying to execute + `configure' itself. + + Running `configure' takes awhile. While running, it prints some + messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. You can give `configure' +initial values for variables by setting them in the environment. Using +a Bourne-compatible shell, you can do that on the command line like +this: + CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure + +Or on systems that have the `env' program, you can do it like this: + env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you must use a version of `make' that +supports the `VPATH' variable, such as GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + If you have to use a `make' that does not supports the `VPATH' +variable, you have to compile the package for one architecture at a time +in the source code directory. After you have installed the package for +one architecture, use `make distclean' before reconfiguring for another +architecture. + +Installation Names +================== + + By default, `make install' will install the package's files in +`/usr/local/bin', `/usr/local/man', etc. You can specify an +installation prefix other than `/usr/local' by giving `configure' the +option `--prefix=PATH'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +give `configure' the option `--exec-prefix=PATH', the package will use +PATH as the prefix for installing programs and libraries. +Documentation and other data files will still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=PATH' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + + Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + + There may be some features `configure' can not figure out +automatically, but needs to determine by the type of host the package +will run on. Usually `configure' can figure that out, but if it prints +a message saying it can not guess the host type, give it the +`--host=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name with three fields: + CPU-COMPANY-SYSTEM + +See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the host type. + + If you are building compiler tools for cross-compiling, you can also +use the `--target=TYPE' option to select the type of system they will +produce code for and the `--build=TYPE' option to select the type of +system on which you are compiling the package. + +Sharing Defaults +================ + + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Operation Controls +================== + + `configure' recognizes the following options to control how it +operates. + +`--cache-file=FILE' + Use and save the results of the tests in FILE instead of + `./config.cache'. Set FILE to `/dev/null' to disable caching, for + debugging `configure'. + +`--help' + Print a summary of the options to `configure', and exit. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`--version' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`configure' also accepts some other, not widely useful, options. diff --git a/poxml/antlr/Makefile.am b/poxml/antlr/Makefile.am new file mode 100644 index 00000000..be459d64 --- /dev/null +++ b/poxml/antlr/Makefile.am @@ -0,0 +1,2 @@ +SUBDIRS = src antlr + diff --git a/poxml/antlr/README b/poxml/antlr/README new file mode 100644 index 00000000..6ca0913b --- /dev/null +++ b/poxml/antlr/README @@ -0,0 +1,72 @@ +C++ support libraries + +Original GNU autconf stuff contributed by Braden N. McDaniel. Slightly +hacked up by me (Ric Klaren (klaren@cs.utwente.nl)) for who it's the first +autoconf/automake/aclocal stuff ever, so suggestions additions welcome. + +HOW TO INSTALL + +In theory do: + +./configure --prefix=<basedirectory where you want it installed> +make +make install + +Now libantlr.a should reside under <basedir>/lib/libantlr.a and the +includes should be at <basedir>/include/antlr. + +NOTE: this only installs the C++ library and header files. + +In the contrib directory a dsp/dsw project for Microsoft Visual C++ can be +found. + +In general this library needs runtime typing information (RTTI) make sure +you enable this in whatever compiler you are using. + +SUPPORTED COMPILERS + +Pasted from the FAQ entry on: http://www.jguru.com/jguru/faq/view.jsp?EID=121 + +Compiler OS Version +------------------ --------------------- ---------- +Sun Workshop 4.2 Solaris 2.6, 7 2.7.1a2 +Sun Workshop 5.0 Solaris 2.7 2.7.1a2 +Sun Workshop 6.0 Solaris 2.7 2.7.1a2 +egcs-1.1.2 Solaris 2.6,7 2.7.1a2 +egcs-1.1.2 Linux 2.2, Solaris 2.6 2.7.1a2 +gcc-2.95.2 Linux 2.2, Solaris 2.6,7 2.7.1a2 +gcc-2.96 (20000527) Solaris 2.6 2.7.1a2 +aCC A.01.21 HP-UX 10.20 2.7.0 no! +Visual C++ 6.0 PC 2.7.1a2 (warnings) +Intel C++ 4.0 NT 4.0 2.7.0 +Borland 5.0 NT 4.0 2.7.0 + +IT DOESN'T WORK!? + +Check out the faq: http://www.jguru.com/jguru/faq/view.jsp?EID=120 + +The text of that entry (by Peter Wells): + +The ANTLR code uses some relatively new features of C++ which not all +compilers support yet (such as namespaces, and new style standard headers). + +There is work currently in progress to provide a compatibility mode for +ANTLR, to enable older compilers to handle this. + +At the moment, you may be able to work around the problem with a few nasty +tricks: + +Try creating some header files like 'iostream' just containing: + +#include <iostream.h> + +and compile with an option to define away the word 'std', such as + +CC .... -Dstd= .... + +Also in the antlr subdirectory there's a file config.hpp. Tweak this one to +enable/disable the different bells and whistles used in the rest of the code. +Don't forget to submit those changes back to us (along with compiler info) +so we can incorporate them in our next release! + +Thanks! diff --git a/poxml/antlr/TODO b/poxml/antlr/TODO new file mode 100644 index 00000000..51d104c3 --- /dev/null +++ b/poxml/antlr/TODO @@ -0,0 +1,34 @@ +* Improve configure scripts => KICK OUT automake! + +* Add allocators to the objects + +* Look more at exception handling + +* TreeParser.cpp around line 76 the MismatchedTokenException here does not + use ttype to improve it's errormessage. Would require changing a bit in + MismatchedTokenException.cpp + +* On Thu, Sep 21, 2000 at 12:33:48AM -0700, John Lambert <JohnL@jBASE.com> wrote: + > 1) The literal EOF is not defined and causes the define of EOF_CHAR in + > CharScanner.hpp to fail. + + ANTLR with STL Port. Changing the EOF define to char_traits<char>::eof() + breaks things for gcc-2.95.2. Fix this in next release portably. + http://www.egroups.com/message/antlr-interest/2520 + +* John Millaway requested some mechanism to add code to the constructor + of the parser/lexer/treewalker. This can be usefull. + http://www.egroups.com/message/antlr-interest/2501 + +* Fix heterogeneous AST stuff. It boils down to adding a method to AST + types that knows how to duplicate the sucker. Atm duptree cannot work + because of this. Knowing one factory is not enough. Also look at having + to set the astfactory by hand (this is not 100% necessary). + http://www.egroups.com/message/antlr-interest/2496 + +* Look at messageLog stuff Ross Bencina proposed. Looks good at first glance. + http://www.egroups.com/message/antlr-interest/2555 + +* Add RW_STL & CC 4.2 patch from Ulrich Teichert: + See my mailbox.. and these comments from Ross Bencina: + http://www.egroups.com/message/antlr-interest/2494 diff --git a/poxml/antlr/antlr/ANTLRException.hpp b/poxml/antlr/antlr/ANTLRException.hpp new file mode 100644 index 00000000..efbe0d7f --- /dev/null +++ b/poxml/antlr/antlr/ANTLRException.hpp @@ -0,0 +1,60 @@ +#ifndef INC_ANTLRException_hpp__ +#define INC_ANTLRException_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include <exception> +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class ANTLRException : public ANTLR_USE_NAMESPACE(std)exception { +private: + ANTLR_USE_NAMESPACE(std)string text; + +public: + ANTLRException(); + ANTLRException(const ANTLR_USE_NAMESPACE(std)string& s); + virtual ~ANTLRException() throw(); + + virtual ANTLR_USE_NAMESPACE(std)string toString() const; + + virtual ANTLR_USE_NAMESPACE(std)string getMessage() const; + + virtual const char* what() const throw(); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_ANTLRException_hpp__ diff --git a/poxml/antlr/antlr/AST.hpp b/poxml/antlr/antlr/AST.hpp new file mode 100644 index 00000000..a36ffd15 --- /dev/null +++ b/poxml/antlr/antlr/AST.hpp @@ -0,0 +1,108 @@ +#ifndef INC_AST_hpp__ +#define INC_AST_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/ASTRefCount.hpp" +#include "antlr/Token.hpp" +#include <vector> +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +struct ASTRef; + +class AST { +public: + AST() : ref(0) {} + virtual ~AST() {} + + virtual void addChild(RefAST c)=0; + + virtual bool equals(RefAST t) const=0; + virtual bool equalsList(RefAST t) const=0; + virtual bool equalsListPartial(RefAST t) const=0; + virtual bool equalsTree(RefAST t) const=0; + virtual bool equalsTreePartial(RefAST t) const=0; + + virtual ANTLR_USE_NAMESPACE(std)vector<RefAST> findAll(RefAST t)=0; + virtual ANTLR_USE_NAMESPACE(std)vector<RefAST> findAllPartial(RefAST t)=0; + + /** Get the first child of this node; null if no children */ + virtual RefAST getFirstChild() const=0; + /** Get the next sibling in line after this one */ + virtual RefAST getNextSibling() const=0; + + /** Get the token text for this node */ + virtual ANTLR_USE_NAMESPACE(std)string getText() const=0; + /** Get the token type for this node */ + virtual int getType() const=0; + + virtual void initialize(int t,const ANTLR_USE_NAMESPACE(std)string& txt)=0; + virtual void initialize(RefAST t)=0; + virtual void initialize(RefToken t)=0; + + /** Set the first child of a node. */ + virtual void setFirstChild(RefAST c)=0; + /** Set the next sibling after this one. */ + virtual void setNextSibling(RefAST n)=0; + + /** Set the token text for this node */ + virtual void setText(const ANTLR_USE_NAMESPACE(std)string& txt)=0; + /** Set the token type for this node */ + virtual void setType(int type)=0; + + virtual ANTLR_USE_NAMESPACE(std)string toString() const=0; + virtual ANTLR_USE_NAMESPACE(std)string toStringList() const=0; + virtual ANTLR_USE_NAMESPACE(std)string toStringTree() const=0; +private: + friend struct ASTRef; + ASTRef* ref; + + AST(const AST& other); + AST(RefAST other); + AST& operator=(const AST& other); + AST& operator=(RefAST other); +}; + +extern RefAST nullAST; +extern AST* const nullASTptr; + +#ifdef NEEDS_OPERATOR_LESS_THAN +inline operator<(RefAST l,RefAST r); // {return true;} +#endif + +ANTLR_END_NAMESPACE + +#endif //INC_AST_hpp__ diff --git a/poxml/antlr/antlr/ASTArray.hpp b/poxml/antlr/antlr/ASTArray.hpp new file mode 100644 index 00000000..5203acf0 --- /dev/null +++ b/poxml/antlr/antlr/ASTArray.hpp @@ -0,0 +1,63 @@ +#ifndef INC_ASTArray_hpp__ +#define INC_ASTArray_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/AST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** ASTArray is a class that allows ANTLR to + * generate code that can create and initialize an array + * in one expression, like: + * (new ASTArray(3))->add(x)->add(y)->add(z) + */ +class ASTArray { +public: + int size; // = 0; + ANTLR_USE_NAMESPACE(std)vector<RefAST> array; + + ASTArray(int capacity) + : size(0) + , array(capacity) + {} + ASTArray* add(RefAST node) { + array[size++] = node; + return this; + } +}; + +ANTLR_END_NAMESPACE + +#endif //INC_ASTArray_hpp__ diff --git a/poxml/antlr/antlr/ASTFactory.hpp b/poxml/antlr/antlr/ASTFactory.hpp new file mode 100644 index 00000000..584cee6d --- /dev/null +++ b/poxml/antlr/antlr/ASTFactory.hpp @@ -0,0 +1,113 @@ +#ifndef INC_ASTFactory_hpp__ +#define INC_ASTFactory_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/AST.hpp" +#include "antlr/ASTArray.hpp" +#include "antlr/ASTPair.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** AST Support code shared by TreeParser and Parser. + * We use delegation to share code (and have only one + * bit of code to maintain) rather than subclassing + * or superclassing (forces AST support code to be + * loaded even when you don't want to do AST stuff). + * + * Typically, setASTNodeType is used to specify the + * type of node to create, but you can override + * create to make heterogeneous nodes etc... + */ +class ASTFactory { +public: + typedef RefAST (*factory_type)(); +protected: + /** Name of AST class to create during tree construction. + * Null implies that the create method should create + * a default AST type such as CommonAST. + */ + factory_type nodeFactory; + +public: + ASTFactory(); + /** Add a child to the current AST */ + void addASTChild(ASTPair& currentAST, RefAST child); + /** Create a new empty AST node; if the user did not specify + * an AST node type, then create a default one: CommonAST. + */ + virtual RefAST create(); + RefAST create(int type); + RefAST create(int type, const ANTLR_USE_NAMESPACE(std)string& txt); + /** Create a new empty AST node; if the user did not specify + * an AST node type, then create a default one: CommonAST. + */ + RefAST create(RefAST tr); + RefAST create(RefToken tok); + /** Copy a single node. clone() is not used because + * we want to return an AST not a plain object...a type + * safety issue. Further, we want to have all AST node + * creation go through the factory so creation can be + * tracked. Returns null if t is null. + */ + RefAST dup(RefAST t); + /** Duplicate tree including siblings of root. */ + RefAST dupList(RefAST t); + /**Duplicate a tree, assuming this is a root node of a tree-- + * duplicate that node and what's below; ignore siblings of root node. + */ + RefAST dupTree(RefAST t); + /** Make a tree from a list of nodes. The first element in the + * array is the root. If the root is null, then the tree is + * a simple list not a tree. Handles null children nodes correctly. + * For example, build(a, b, null, c) yields tree (a b c). build(null,a,b) + * yields tree (nil a b). + */ + RefAST make(ANTLR_USE_NAMESPACE(std)vector<RefAST> nodes); + /** Make a tree from a list of nodes, where the nodes are contained + * in an ASTArray object + */ + RefAST make(ASTArray* nodes); + /** Make an AST the root of current AST */ + void makeASTRoot(ASTPair& currentAST, RefAST root); + void setASTNodeFactory(factory_type factory); + virtual ~ASTFactory() {} +private: + ASTFactory( const ASTFactory& ); + ASTFactory& operator=( const ASTFactory& ); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_ASTFactory_hpp__ diff --git a/poxml/antlr/antlr/ASTNULLType.hpp b/poxml/antlr/antlr/ASTNULLType.hpp new file mode 100644 index 00000000..8f3faa46 --- /dev/null +++ b/poxml/antlr/antlr/ASTNULLType.hpp @@ -0,0 +1,72 @@ +#ifndef INC_ASTNULLType_hpp__ +#define INC_ASTNULLType_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/AST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** There is only one instance of this class **/ +class ASTNULLType : public AST { +public: + void addChild(RefAST c) {;} + bool equals(RefAST t) const {return false;} + bool equalsList(RefAST t) const {return false;} + bool equalsListPartial(RefAST t) const {return false;} + bool equalsTree(RefAST t) const {return false;} + bool equalsTreePartial(RefAST t) const {return false;} + ANTLR_USE_NAMESPACE(std)vector<RefAST> findAll(RefAST tree) + {return ANTLR_USE_NAMESPACE(std)vector<RefAST>();} + ANTLR_USE_NAMESPACE(std)vector<RefAST> findAllPartial(RefAST subtree) + {return ANTLR_USE_NAMESPACE(std)vector<RefAST>();} + RefAST getFirstChild() const { return this; } + RefAST getNextSibling() const { return this; } + ANTLR_USE_NAMESPACE(std)string getText() const { return "<ASTNULL>"; } + int getType() const { return Token::NULL_TREE_LOOKAHEAD; } + void initialize(int t, const ANTLR_USE_NAMESPACE(std)string& txt) {} + void initialize(RefAST t) {} + void initialize(RefToken t) {} + void setFirstChild(RefAST c) {;} + void setNextSibling(RefAST n) {;} + void setText(const ANTLR_USE_NAMESPACE(std)string& text) {;} + void setType(int ttype) {;} + ANTLR_USE_NAMESPACE(std)string toString() const {return getText();} + ANTLR_USE_NAMESPACE(std)string toStringList() const {return getText();} + ANTLR_USE_NAMESPACE(std)string toStringTree() const {return getText();} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_ASTNULLType_hpp__ diff --git a/poxml/antlr/antlr/ASTPair.hpp b/poxml/antlr/antlr/ASTPair.hpp new file mode 100644 index 00000000..eb7629ba --- /dev/null +++ b/poxml/antlr/antlr/ASTPair.hpp @@ -0,0 +1,77 @@ +#ifndef INC_ASTPair_hpp__ +#define INC_ASTPair_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ +#include "antlr/config.hpp" +#include "antlr/AST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** ASTPair: utility class used for manipulating a pair of ASTs + * representing the current AST root and current AST sibling. + * This exists to compensate for the lack of pointers or 'var' + * arguments in Java. + * + * OK, so we can do those things in C++, but it seems easier + * to stick with the Java way for now. + */ +class ASTPair { +public: + RefAST root; // current root of tree + RefAST child; // current child to which siblings are added + + /** Make sure that child is the last sibling */ + void advanceChildToEnd() { + if (child) { + while (child->getNextSibling()) { + child = child->getNextSibling(); + } + } + } +// /** Copy an ASTPair. Don't call it clone() because we want type-safety */ +// ASTPair copy() { +// ASTPair tmp = new ASTPair(); +// tmp.root = root; +// tmp.child = child; +// return tmp; +// } + ANTLR_USE_NAMESPACE(std)string toString() const { + ANTLR_USE_NAMESPACE(std)string r = !root ? ANTLR_USE_NAMESPACE(std)string("null") : root->getText(); + ANTLR_USE_NAMESPACE(std)string c = !child ? ANTLR_USE_NAMESPACE(std)string("null") : child->getText(); + return "["+r+","+c+"]"; + } +}; + +ANTLR_END_NAMESPACE + +#endif //INC_ASTPair_hpp__ diff --git a/poxml/antlr/antlr/ASTRefCount.hpp b/poxml/antlr/antlr/ASTRefCount.hpp new file mode 100644 index 00000000..cb44128b --- /dev/null +++ b/poxml/antlr/antlr/ASTRefCount.hpp @@ -0,0 +1,104 @@ +#ifndef INC_ASTRefCount_hpp__ +# define INC_ASTRefCount_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +# include "antlr/config.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + + class AST; + +struct ASTRef +{ + AST* const ptr; + unsigned int count; + + ASTRef(AST* p); + ~ASTRef(); + ASTRef* increment(); + bool decrement(); + + static ASTRef* getRef(const AST* p); +private: + ASTRef( const ASTRef& ); + ASTRef& operator=( const ASTRef& ); +}; + +template<class T> + class ASTRefCount +{ +private: + ASTRef* ref; + +public: + ASTRefCount(const AST* p=0) + : ref(p ? ASTRef::getRef(p) : 0) + { + } + ASTRefCount(const ASTRefCount<T>& other) + : ref(other.ref ? other.ref->increment() : 0) + { + } + ~ASTRefCount() + { + if (ref && ref->decrement()) delete ref; + } + ASTRefCount<T>& operator=(AST* other) + { + ASTRef* tmp=ASTRef::getRef(other); + if (ref && ref->decrement()) delete ref; + ref=tmp; + return *this; + } + ASTRefCount<T>& operator=(const ASTRefCount<T>& other) + { + ASTRef* tmp=other.ref ? other.ref->increment() : 0; + if (ref && ref->decrement()) delete ref; + ref=tmp; + return *this; + } + + operator T* () const + { return ref ? static_cast<T*>(ref->ptr) : 0; } + T* operator->() const + { return ref ? static_cast<T*>(ref->ptr) : 0; } + T* get() const + { return ref ? static_cast<T*>(ref->ptr) : 0; } +}; + +typedef ASTRefCount<AST> RefAST; + +ANTLR_END_NAMESPACE + +#endif //INC_ASTRefCount_hpp__ diff --git a/poxml/antlr/antlr/BaseAST.hpp b/poxml/antlr/antlr/BaseAST.hpp new file mode 100644 index 00000000..7b93c1ef --- /dev/null +++ b/poxml/antlr/antlr/BaseAST.hpp @@ -0,0 +1,106 @@ +#ifndef INC_BaseAST_hpp__ +#define INC_BaseAST_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/AST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class BaseAST; +typedef ASTRefCount<BaseAST> RefBaseAST; + +class BaseAST : public AST { +public: + BaseAST() : AST() {} + virtual ~BaseAST() {} +protected: + RefBaseAST down; + RefBaseAST right; + +//private: +// static bool verboseStringConversion; +// static ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> tokenNames; + +public: + virtual void addChild(RefAST c); + +private: + void doWorkForFindAll(ANTLR_USE_NAMESPACE(std)vector<RefAST>& v, + RefAST target,bool partialMatch); + +public: + virtual bool equals(RefAST t) const; + virtual bool equalsList(RefAST t) const; + virtual bool equalsListPartial(RefAST t) const; + virtual bool equalsTree(RefAST t) const; + virtual bool equalsTreePartial(RefAST t) const; + + virtual ANTLR_USE_NAMESPACE(std)vector<RefAST> findAll(RefAST t); + virtual ANTLR_USE_NAMESPACE(std)vector<RefAST> findAllPartial(RefAST t); + + /** Get the first child of this node; null if no children */ + virtual RefAST getFirstChild() const; + /** Get the next sibling in line after this one */ + virtual RefAST getNextSibling() const; + + /** Get the token text for this node */ + virtual ANTLR_USE_NAMESPACE(std)string getText() const; + /** Get the token type for this node */ + virtual int getType() const; + + /** Remove all children */ + virtual void removeChildren(); + + /** Set the first child of a node. */ + virtual void setFirstChild(RefAST c); + /** Set the next sibling after this one. */ + void setNextSibling(RefAST n); + + /** Set the token text for this node */ + virtual void setText(const ANTLR_USE_NAMESPACE(std)string& txt); + /** Set the token type for this node */ + virtual void setType(int type); + +// static void setVerboseStringConversion(bool verbose, +// const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& names); + + virtual ANTLR_USE_NAMESPACE(std)string toString() const; + virtual ANTLR_USE_NAMESPACE(std)string toStringList() const; + virtual ANTLR_USE_NAMESPACE(std)string toStringTree() const; +}; + +ANTLR_END_NAMESPACE + +#endif //INC_BaseAST_hpp__ diff --git a/poxml/antlr/antlr/BitSet.hpp b/poxml/antlr/antlr/BitSet.hpp new file mode 100644 index 00000000..4eb400c7 --- /dev/null +++ b/poxml/antlr/antlr/BitSet.hpp @@ -0,0 +1,50 @@ +#ifndef INC_BitSet_hpp__ +#define INC_BitSet_hpp__ + +#include "antlr/config.hpp" +#include <vector> +#include <stdexcept> + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**A BitSet to replace java.util.BitSet. + * Primary differences are that most set operators return new sets + * as opposed to oring and anding "in place". Further, a number of + * operations were added. I cannot contain a BitSet because there + * is no way to access the internal bits (which I need for speed) + * and, because it is final, I cannot subclass to add functionality. + * Consider defining set degree. Without access to the bits, I must + * call a method n times to test the ith bit...ack! + * + * Also seems like or() from util is wrong when size of incoming set is bigger + * than this.length. + * + * + * This is a C++ version of the Java class described above, with only + * a handful of the methods implemented, because we don't need the + * others at runtime. It's really just a wrapper around vector<bool>, + * which should probably be changed to a wrapper around bitset, once + * bitset is more widely available. + * + * @author Terence Parr, MageLang Institute + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ +class BitSet { +private: + ANTLR_USE_NAMESPACE(std)vector<bool> storage; + +public: + BitSet(int nbits=64); + BitSet(const unsigned long* bits_,int nlongs); + ~BitSet(); + + void add(int el); + + bool member(int el) const; + + ANTLR_USE_NAMESPACE(std)vector<int> toArray() const; +}; + +ANTLR_END_NAMESPACE + +#endif //INC_BitSet_hpp__ diff --git a/poxml/antlr/antlr/CharBuffer.hpp b/poxml/antlr/antlr/CharBuffer.hpp new file mode 100644 index 00000000..45d467bb --- /dev/null +++ b/poxml/antlr/antlr/CharBuffer.hpp @@ -0,0 +1,75 @@ +#ifndef INC_CharBuffer_hpp__ +#define INC_CharBuffer_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/**A Stream of characters fed to the lexer from a InputStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input characters. Normally, + * "k" characters are stored in the buffer. More characters may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of characters is deferred. In other words, reading the next + * character is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.CharQueue + */ + +#include "antlr/config.hpp" +#include "antlr/InputBuffer.hpp" +#include <iostream> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CharBuffer : public InputBuffer { +private: + // char source + ANTLR_USE_NAMESPACE(std)istream& input; + +public: + /** Create a character buffer */ + CharBuffer(ANTLR_USE_NAMESPACE(std)istream& input_); + + /** Get the next character from the stream */ + int getChar(); + +private: +// Not implemented. +// CharBuffer(const CharBuffer& other); +// CharBuffer& operator=(const CharBuffer& other); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_CharBuffer_hpp__ diff --git a/poxml/antlr/antlr/CharScanner.hpp b/poxml/antlr/antlr/CharScanner.hpp new file mode 100644 index 00000000..b0ab9276 --- /dev/null +++ b/poxml/antlr/antlr/CharScanner.hpp @@ -0,0 +1,265 @@ +#ifndef INC_CharScanner_hpp__ +#define INC_CharScanner_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * $Id$ + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/TokenStream.hpp" +#include "antlr/RecognitionException.hpp" +#include "antlr/InputBuffer.hpp" +#include "antlr/BitSet.hpp" +#include "antlr/LexerSharedInputState.hpp" +#include <map> +#include <cstdio> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CharScanner; + +class CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> { +private: + const CharScanner* scanner; +public: +#ifdef NO_TEMPLATE_PARTS + CharScannerLiteralsLess(); // not really used +#endif + CharScannerLiteralsLess(const CharScanner* theScanner); + bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const; +private: +// CharScannerLiteralsLess(const CharScannerLiteralsLess&); +// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&); +}; + +class CharScanner : public TokenStream { +private: +#ifndef NO_STATIC_CONSTS + static const int NO_CHAR = 0; +#else + enum { + NO_CHAR = 0 + }; +#endif + +public: +#ifndef NO_STATIC_CONSTS + static const int EOF_CHAR = EOF; +#else + enum { + EOF_CHAR = EOF + }; +#endif + +protected: + ANTLR_USE_NAMESPACE(std)string text; // text of current token + + bool saveConsumedInput; // does consume() save characters? + + typedef RefToken (*factory_type)(); + factory_type tokenFactory; // what kind of tokens to create? + + bool caseSensitive; + ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass + + RefToken _returnToken; // used to return tokens w/o using return val + + // Input chars + LexerSharedInputState inputState; + + /** Used during filter mode to indicate that path is desired. + * A subsequent scan error will report an error as usual if acceptPath=true; + */ + bool commitToPath; + +public: + CharScanner(); + + CharScanner(InputBuffer& cb); + CharScanner(InputBuffer* cb); + + CharScanner(const LexerSharedInputState& state); + + virtual ~CharScanner(); + + virtual void append(char c); + + virtual void append(const ANTLR_USE_NAMESPACE(std)string& s); + + virtual void commit(); + + virtual void consume(); + + /** Consume chars until one matches the given char */ + virtual void consumeUntil(int c); + + /** Consume chars until one matches the given set */ + virtual void consumeUntil(const BitSet& set); + + virtual bool getCaseSensitive() const; + + virtual bool getCaseSensitiveLiterals() const=0; + + virtual int getColumn() const; + + virtual void setColumn(int c); + + virtual bool getCommitToPath() const; + + virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const; + + virtual InputBuffer& getInputBuffer(); + + virtual LexerSharedInputState getInputState(); + + virtual int getLine() const; + + /** return a copy of the current text buffer */ + virtual const ANTLR_USE_NAMESPACE(std)string& getText() const; + + virtual RefToken getTokenObject() const; + + virtual int LA(int i); + +protected: + virtual RefToken makeToken(int t); + +public: + virtual int mark(); + + virtual void match(int c); + + virtual void match(const BitSet& b); + + virtual void match(const ANTLR_USE_NAMESPACE(std)string& s); + + virtual void matchNot(int c); + + virtual void matchRange(int c1, int c2); + + virtual void newline(); + + virtual void tab(); + + void panic(); + + void panic(const ANTLR_USE_NAMESPACE(std)string& s); + + /** Report exception errors caught in nextToken() */ + virtual void reportError(const RecognitionException& e); + + /** Parser error-reporting function can be overridden in subclass */ + virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s); + + /** Parser warning-reporting function can be overridden in subclass */ + virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s); + + virtual void resetText(); + + virtual void rewind(int pos); + + virtual void setCaseSensitive(bool t); + + virtual void setCommitToPath(bool commit); + + virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f); + + virtual void setInputState(LexerSharedInputState state); + + virtual void setLine(int l); + + virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s); + + virtual void setTokenObjectFactory(factory_type factory); + + // Test the token text against the literals table + // Override this method to perform a different literals test + virtual int testLiteralsTable(int ttype) const; + + // Test the text passed in against the literals table + // Override this method to perform a different literals test + // This is used primarily when you want to test a portion of + // a token + virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text,int ttype) const; + + // Override this method to get more specific case handling + virtual int toLower(int c) const; + +protected: + class Tracer { + private: + CharScanner* parser; + ANTLR_USE_NAMESPACE(std)string text; + public: + Tracer(CharScanner* p,const ANTLR_USE_NAMESPACE(std)string& t) + : parser(p), text(t) { parser->traceIn(text); } + ~Tracer() + { parser->traceOut(text); } + }; + + int traceDepth; +public: + virtual void traceIndent(); + virtual void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname); + virtual void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname); + + /* This method is called by YourLexer::nextToken() when the lexer has + * hit EOF condition. EOF is NOT a character. + * This method is not called if EOF is reached during + * syntactic predicate evaluation or during evaluation + * of normal lexical rules, which presumably would be + * an IOException. This traps the "normal" EOF condition. + * + * uponEOF() is called after the complete evaluation of + * the previous token and only if your parser asks + * for another token beyond that last non-EOF token. + * + * You might want to throw token or char stream exceptions + * like: "Heh, premature eof" or a retry stream exception + * ("I found the end of this file, go back to referencing file"). + */ + virtual void uponEOF(); +}; + +inline int CharScanner::LA(int i) +{ + if ( caseSensitive ) { + return inputState->getInput().LA(i); + } else { + return toLower(inputState->getInput().LA(i)); + } +} + +ANTLR_END_NAMESPACE + +#endif //INC_CharScanner_hpp__ diff --git a/poxml/antlr/antlr/CharStreamException.hpp b/poxml/antlr/antlr/CharStreamException.hpp new file mode 100644 index 00000000..33f52061 --- /dev/null +++ b/poxml/antlr/antlr/CharStreamException.hpp @@ -0,0 +1,18 @@ +#ifndef INC_CharStreamException_hpp__ +#define INC_CharStreamException_hpp__ + +#include "antlr/config.hpp" +#include "antlr/ANTLRException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CharStreamException : public ANTLRException { +public: + CharStreamException(const ANTLR_USE_NAMESPACE(std)string& s) + : ANTLRException(s) {} + ~CharStreamException() throw() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_CharStreamException_hpp__ diff --git a/poxml/antlr/antlr/CharStreamIOException.hpp b/poxml/antlr/antlr/CharStreamIOException.hpp new file mode 100644 index 00000000..1a8b1d1e --- /dev/null +++ b/poxml/antlr/antlr/CharStreamIOException.hpp @@ -0,0 +1,20 @@ +#ifndef INC_CharStreamIOException_hpp__ +#define INC_CharStreamIOException_hpp__ + +#include "antlr/config.hpp" +#include "antlr/CharStreamException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CharStreamIOException : public CharStreamException { +public: + ANTLR_USE_NAMESPACE(std)exception io; + + CharStreamIOException(ANTLR_USE_NAMESPACE(std)exception& e) + : CharStreamException(e.what()), io(e) {} + ~CharStreamIOException() throw() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_CharStreamIOException_hpp__ diff --git a/poxml/antlr/antlr/CircularQueue.hpp b/poxml/antlr/antlr/CircularQueue.hpp new file mode 100644 index 00000000..eadf8d42 --- /dev/null +++ b/poxml/antlr/antlr/CircularQueue.hpp @@ -0,0 +1,88 @@ +#ifndef INC_CircularQueue_hpp__ +#define INC_CircularQueue_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include <vector> + +ANTLR_BEGIN_NAMESPACE(antlr) + +// Resize every 5000 items +#define OFFSET_MAX_RESIZE 5000 + +template <class T> +class CircularQueue { +private: + ANTLR_USE_NAMESPACE(std)vector<T> storage; + +public: + CircularQueue() + : storage(), m_offset(0) {} + ~CircularQueue() + {} + + T elementAt(int idx) const + { return storage[idx+m_offset]; } //Is this safe? + void removeFirst() + { + if (m_offset >= OFFSET_MAX_RESIZE) { + storage.erase( storage.begin(), storage.begin() + m_offset + 1 ); + m_offset = 0; + } else { + ++m_offset; + } + } + inline void removeItems( int nb ) + { + if (m_offset >= OFFSET_MAX_RESIZE) { + storage.erase( storage.begin(), storage.begin() + m_offset + nb ); + m_offset = 0; + } else { + m_offset+=nb; + } + } + void append(const T& t) + { storage.push_back(t); } + int entries() const + { return storage.size()-m_offset; } + +private: + int m_offset; + CircularQueue(const CircularQueue&); + const CircularQueue& operator=(const CircularQueue&); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_CircularQueue_hpp__ diff --git a/poxml/antlr/antlr/CommonAST.hpp b/poxml/antlr/antlr/CommonAST.hpp new file mode 100644 index 00000000..c7ab7313 --- /dev/null +++ b/poxml/antlr/antlr/CommonAST.hpp @@ -0,0 +1,68 @@ +#ifndef INC_CommonAST_hpp__ +#define INC_CommonAST_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/BaseAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CommonAST : public BaseAST { +public: + CommonAST(); + CommonAST(RefToken t); + virtual ~CommonAST(); + + virtual ANTLR_USE_NAMESPACE(std)string getText() const; + virtual int getType() const; + + virtual void initialize(int t,const ANTLR_USE_NAMESPACE(std)string& txt); + virtual void initialize(RefAST t); + virtual void initialize(RefToken t); + + virtual void setText(const ANTLR_USE_NAMESPACE(std)string& txt); + virtual void setType(int type); + + static RefAST factory(); + +protected: + int ttype; + ANTLR_USE_NAMESPACE(std)string text; +}; + +typedef ASTRefCount<CommonAST> RefCommonAST; + +ANTLR_END_NAMESPACE + +#endif //INC_CommonAST_hpp__ diff --git a/poxml/antlr/antlr/CommonASTWithHiddenTokens.hpp b/poxml/antlr/antlr/CommonASTWithHiddenTokens.hpp new file mode 100644 index 00000000..11e030e7 --- /dev/null +++ b/poxml/antlr/antlr/CommonASTWithHiddenTokens.hpp @@ -0,0 +1,41 @@ +#ifndef INC_CommonASTWithHiddenTokens_hpp__ +#define INC_CommonASTWithHiddenTokens_hpp__ + +/** A CommonAST whose initialization copies hidden token + * information from the Token used to create a node. + */ + +#include "antlr/config.hpp" +#include "antlr/CommonAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CommonASTWithHiddenTokens : public CommonAST { +public: + CommonASTWithHiddenTokens() : CommonAST() + { + } + virtual ~CommonASTWithHiddenTokens() + { + } +protected: + RefToken hiddenBefore,hiddenAfter; // references to hidden tokens +public: + virtual RefToken getHiddenAfter() const + { return hiddenAfter; } + virtual RefToken getHiddenBefore() const + { return hiddenBefore; } + + // Borland C++ builder seems to need the decl's of the first two... + virtual void initialize(int t,const ANTLR_USE_NAMESPACE(std)string& txt); + virtual void initialize(RefAST t); + virtual void initialize(RefToken t); + + static RefAST factory(); +}; + +typedef ASTRefCount<CommonASTWithHiddenTokens> RefCommonASTWithHiddenTokens; + +ANTLR_END_NAMESPACE + +#endif //INC_CommonASTWithHiddenTokens_hpp__ diff --git a/poxml/antlr/antlr/CommonHiddenStreamToken.hpp b/poxml/antlr/antlr/CommonHiddenStreamToken.hpp new file mode 100644 index 00000000..50ff2354 --- /dev/null +++ b/poxml/antlr/antlr/CommonHiddenStreamToken.hpp @@ -0,0 +1,30 @@ +#ifndef INC_CommonHiddenStreamToken_hpp__ +#define INC_CommonHiddenStreamToken_hpp__ + +#include "antlr/config.hpp" +#include "antlr/CommonToken.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CommonHiddenStreamToken : public CommonToken { +protected: + RefToken hiddenBefore; + RefToken hiddenAfter; + +public: + CommonHiddenStreamToken(); + CommonHiddenStreamToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt); + CommonHiddenStreamToken(const ANTLR_USE_NAMESPACE(std)string& s); + + RefToken getHiddenAfter(); + RefToken getHiddenBefore(); + + static RefToken factory(); + + void setHiddenAfter(RefToken t); + void setHiddenBefore(RefToken t); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_CommonHiddenStreamToken_hpp__ diff --git a/poxml/antlr/antlr/CommonToken.hpp b/poxml/antlr/antlr/CommonToken.hpp new file mode 100644 index 00000000..669aa535 --- /dev/null +++ b/poxml/antlr/antlr/CommonToken.hpp @@ -0,0 +1,77 @@ +#ifndef INC_CommonToken_hpp__ +#define INC_CommonToken_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/Token.hpp" +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class CommonToken : public Token { +protected: + // most tokens will want line and text information + int line; + int col; + ANTLR_USE_NAMESPACE(std)string text; + +public: + CommonToken(); + CommonToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt); + CommonToken(const ANTLR_USE_NAMESPACE(std)string& s); + + int getLine() const; + ANTLR_USE_NAMESPACE(std)string getText() const; + void setLine(int l); + void setText(const ANTLR_USE_NAMESPACE(std)string& s); + + ANTLR_USE_NAMESPACE(std)string toString() const; + + /** Return token's start column */ + int getColumn() const; + + void setColumn(int c); + + bool isInvalid() const; + + static RefToken factory(); + +private: + CommonToken(const CommonToken&); + const CommonToken& operator=(const CommonToken&); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_CommonToken_hpp__ diff --git a/poxml/antlr/antlr/InputBuffer.hpp b/poxml/antlr/antlr/InputBuffer.hpp new file mode 100644 index 00000000..96e62191 --- /dev/null +++ b/poxml/antlr/antlr/InputBuffer.hpp @@ -0,0 +1,158 @@ +#ifndef INC_InputBuffer_hpp__ +#define INC_InputBuffer_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/**A Stream of characters fed to the lexer from a InputStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input characters. Normally, + * "k" characters are stored in the buffer. More characters may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of characters is deferred. In other words, reading the next + * character is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.CharQueue + */ + +#include "antlr/config.hpp" +#include "antlr/CircularQueue.hpp" +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class InputBuffer { +protected: + // char source + // leave to subclasses + + // Number of active markers + int nMarkers; // = 0; + + // Additional offset used when markers are active + int markerOffset; // = 0; + + // Number of calls to consume() since last LA() or LT() call + int numToConsume; // = 0; + + // Circular queue + CircularQueue<int> queue; + +public: + /** Create a character buffer */ + InputBuffer(); + + virtual ~InputBuffer() {} + + /** This method updates the state of the input buffer so that + * the text matched since the most recent mark() is no longer + * held by the buffer. So, you either do a mark/rewind for + * failed predicate or mark/commit to keep on parsing without + * rewinding the input. + */ + void commit(); + + /** Mark another character for deferred consumption */ + virtual void consume(); + + /** Ensure that the character buffer is sufficiently full */ + virtual void fill(int amount); + + /** Override this in subclasses to get the next character */ + virtual int getChar()=0; + + ANTLR_USE_NAMESPACE(std)string getLAChars() const; + + ANTLR_USE_NAMESPACE(std)string getMarkedChars() const; + + virtual bool isMarked() const; + + /** Get a lookahead character */ + virtual int LA(int i); + + /**Return an integer marker that can be used to rewind the buffer to + * its current state. + */ + virtual int mark(); + + /**Rewind the character buffer to a marker. + * @param mark Marker returned previously from mark() + */ + virtual void rewind(int mark); + +protected: + /** Sync up deferred consumption */ + void syncConsume(); + +private: + InputBuffer(const InputBuffer& other); + InputBuffer& operator=(const InputBuffer& other); +}; + +/** Sync up deferred consumption */ +inline void InputBuffer::syncConsume() { +#ifdef OLD_CODE + while (numToConsume > 0) { + if (nMarkers > 0) + { + // guess mode -- leave leading characters and bump offset. + markerOffset++; + } else { + // normal mode -- remove first character + queue.removeFirst(); + } + numToConsume--; + } +#endif + + if (numToConsume > 0) { + if (nMarkers > 0) { + markerOffset += numToConsume; + } else { + queue.removeItems( numToConsume ); + } + numToConsume = 0; + } +} + +/** Get a lookahead character */ +inline int InputBuffer::LA(int i) +{ + fill(i); + return queue.elementAt(markerOffset + i - 1); +} + +ANTLR_END_NAMESPACE + +#endif //INC_InputBuffer_hpp__ diff --git a/poxml/antlr/antlr/LLkParser.hpp b/poxml/antlr/antlr/LLkParser.hpp new file mode 100644 index 00000000..8b8db188 --- /dev/null +++ b/poxml/antlr/antlr/LLkParser.hpp @@ -0,0 +1,82 @@ +#ifndef INC_LLkParser_hpp__ +#define INC_LLkParser_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/Parser.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**An LL(k) parser. + * + * @see antlr.Token + * @see antlr.TokenBuffer + * @see antlr.LL1Parser + */ +class LLkParser : public Parser { +protected: + int k; + +public: +// LLkParser(int k_); + + LLkParser(const ParserSharedInputState& lexer, int k_); + + LLkParser(TokenBuffer& tokenBuf, int k_); + + LLkParser(TokenStream& lexer, int k_); + + /**Consume another token from the input stream. Can only write sequentially! + * If you need 3 tokens ahead, you must consume() 3 times. + * <p> + * Note that it is possible to overwrite tokens that have not been matched. + * For example, calling consume() 3 times when k=2, means that the first token + * consumed will be overwritten with the 3rd. + */ + void consume(); + + int LA(int i); + + RefToken LT(int i); + +private: + void trace(const ANTLR_USE_NAMESPACE(std)string& ee, const ANTLR_USE_NAMESPACE(std)string& rname); +public: + void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname); + void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_LLkParser_hpp__ diff --git a/poxml/antlr/antlr/LexerSharedInputState.hpp b/poxml/antlr/antlr/LexerSharedInputState.hpp new file mode 100644 index 00000000..dba2a5f4 --- /dev/null +++ b/poxml/antlr/antlr/LexerSharedInputState.hpp @@ -0,0 +1,49 @@ +#ifndef INC_LexerSharedInputState_hpp__ +#define INC_LexerSharedInputState_hpp__ + +#include "antlr/config.hpp" +#include "antlr/InputBuffer.hpp" +#include "antlr/RefCount.hpp" +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input stream of characters. Multiple lexers + * share a single LexerSharedInputState to lex + * the same input stream. + */ +class LexerInputState { +public: + LexerInputState(InputBuffer* inbuf); + LexerInputState(InputBuffer& inbuf); + LexerInputState(ANTLR_USE_NAMESPACE(std)istream& in); + ~LexerInputState(); + + int column; + int line; + int tokenStartColumn; + int tokenStartLine; + int guessing; + /** What file (if known) caused the problem? */ + ANTLR_USE_NAMESPACE(std)string filename; + InputBuffer& getInput(); +private: + InputBuffer* input; + bool inputResponsible; + + // we don't want these: + LexerInputState(const LexerInputState&); + LexerInputState& operator=(const LexerInputState&); +}; + +typedef RefCount<LexerInputState> LexerSharedInputState; + +inline InputBuffer& LexerInputState::getInput() +{ + return *input; +} + +ANTLR_END_NAMESPACE + +#endif //INC_LexerSharedInputState_hpp__ diff --git a/poxml/antlr/antlr/Makefile.am b/poxml/antlr/antlr/Makefile.am new file mode 100644 index 00000000..bafa1347 --- /dev/null +++ b/poxml/antlr/antlr/Makefile.am @@ -0,0 +1,45 @@ +noinst_HEADERS = \ + ANTLRException.hpp \ + AST.hpp \ + ASTArray.hpp \ + ASTFactory.hpp \ + ASTNULLType.hpp \ + ASTPair.hpp \ + ASTRefCount.hpp \ + BaseAST.hpp \ + BitSet.hpp \ + CharBuffer.hpp \ + CharScanner.hpp \ + CharStreamException.hpp \ + CharStreamIOException.hpp \ + CircularQueue.hpp \ + CommonAST.hpp \ + CommonASTWithHiddenTokens.hpp \ + CommonHiddenStreamToken.hpp \ + CommonToken.hpp \ + InputBuffer.hpp \ + LLkParser.hpp \ + LexerSharedInputState.hpp \ + MismatchedCharException.hpp \ + MismatchedTokenException.hpp \ + NoViableAltException.hpp \ + NoViableAltForCharException.hpp \ + Parser.hpp \ + ParserSharedInputState.hpp \ + RecognitionException.hpp \ + RefCount.hpp \ + SemanticException.hpp \ + String.hpp \ + Token.hpp \ + TokenBuffer.hpp \ + TokenStream.hpp \ + TokenStreamBasicFilter.hpp \ + TokenStreamException.hpp \ + TokenStreamHiddenTokenFilter.hpp \ + TokenStreamIOException.hpp \ + TokenStreamRecognitionException.hpp \ + TokenStreamRetryException.hpp \ + TokenStreamSelector.hpp \ + TreeParser.hpp \ + TreeParserSharedInputState.hpp \ + config.hpp diff --git a/poxml/antlr/antlr/MismatchedCharException.hpp b/poxml/antlr/antlr/MismatchedCharException.hpp new file mode 100644 index 00000000..ea923a9d --- /dev/null +++ b/poxml/antlr/antlr/MismatchedCharException.hpp @@ -0,0 +1,127 @@ +#ifndef INC_MismatchedCharException_hpp__ +#define INC_MismatchedCharException_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/RecognitionException.hpp" +#include "antlr/BitSet.hpp" +#include "antlr/CharScanner.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class MismatchedCharException : public RecognitionException { +public: + // Types of chars +#ifndef NO_STATIC_CONSTS + static const int CHAR = 1; + static const int NOT_CHAR = 2; + static const int RANGE = 3; + static const int NOT_RANGE = 4; + static const int SET = 5; + static const int NOT_SET = 6; +#else + enum { + CHAR = 1, + NOT_CHAR = 2, + RANGE = 3, + NOT_RANGE = 4, + SET = 5, + NOT_SET = 6 + }; +#endif + +public: + // One of the above + int mismatchType; + + // what was found on the input stream + int foundChar; + + // For CHAR/NOT_CHAR and RANGE/NOT_RANGE + int expecting; + + // For RANGE/NOT_RANGE (expecting is lower bound of range) + int upper; + + // For SET/NOT_SET + BitSet set; + +protected: + // who knows...they may want to ask scanner questions + CharScanner* scanner; + +public: + MismatchedCharException(); + + // Expected range / not range + MismatchedCharException( + int c, + int lower, + int upper_, + bool matchNot, + CharScanner* scanner_ + ); + + // Expected token / not token + MismatchedCharException( + int c, + int expecting_, + bool matchNot, + CharScanner* scanner_ + ); + + // Expected BitSet / not BitSet + MismatchedCharException( + int c, + BitSet set_, + bool matchNot, + CharScanner* scanner_ + ); + + MismatchedCharException( + const ANTLR_USE_NAMESPACE(std)string& s, + int line + ); + ~MismatchedCharException() throw() {} + + /** + * Returns the error message that happened on the line/col given. + * Copied from toString(). + */ + ANTLR_USE_NAMESPACE(std)string getMessage() const; +}; + +ANTLR_END_NAMESPACE + +#endif //INC_MismatchedCharException_hpp__ diff --git a/poxml/antlr/antlr/MismatchedTokenException.hpp b/poxml/antlr/antlr/MismatchedTokenException.hpp new file mode 100644 index 00000000..ae4a82cd --- /dev/null +++ b/poxml/antlr/antlr/MismatchedTokenException.hpp @@ -0,0 +1,167 @@ +#ifndef INC_MismatchedTokenException_hpp__ +#define INC_MismatchedTokenException_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/RecognitionException.hpp" +#include "antlr/BitSet.hpp" +#include "antlr/Token.hpp" +#include "antlr/AST.hpp" +#include <vector> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class MismatchedTokenException : public RecognitionException { +private: + // Token names array for formatting + ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> tokenNames; + +public: + // The token that was encountered + const RefToken token; + // The offending AST node if tree walking + const RefAST node; + + ANTLR_USE_NAMESPACE(std)string tokenText; // taken from node or token object + + // Types of tokens +#ifndef NO_STATIC_CONSTS + static const int TOKEN = 1; + static const int NOT_TOKEN = 2; + static const int RANGE = 3; + static const int NOT_RANGE = 4; + static const int SET = 5; + static const int NOT_SET = 6; +#else + enum { + TOKEN = 1, + NOT_TOKEN = 2, + RANGE = 3, + NOT_RANGE = 4, + SET = 5, + NOT_SET = 6 + }; +#endif + +public: + // One of the above + int mismatchType; + + // For TOKEN/NOT_TOKEN and RANGE/NOT_RANGE + int expecting; + + // For RANGE/NOT_RANGE (expecting is lower bound of range) + int upper; + + // For SET/NOT_SET + BitSet set; + + MismatchedTokenException(); + + // Expected range / not range + MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + int lower, + int upper_, + bool matchNot + ); + + // Expected token / not token + MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + int expecting_, + bool matchNot + ); + + // Expected BitSet / not BitSet + MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + BitSet set_, + bool matchNot + ); + + // Expected range / not range + MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + int lower, + int upper_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ + ); + + // Expected token / not token + MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + int expecting_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ + ); + + // Expected BitSet / not BitSet + MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + BitSet set_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ + ); + ~MismatchedTokenException() throw() {} + + /** + * @deprecated As of ANTLR 2.7.0 + */ + ANTLR_USE_NAMESPACE(std)string getErrorMessage() const; + + /** + * Returns the error message that happened on the line/col given. + * Copied from toString(). + */ + ANTLR_USE_NAMESPACE(std)string getMessage() const; + +private: + ANTLR_USE_NAMESPACE(std)string tokenName(int tokenType) const; + +public: + ANTLR_USE_NAMESPACE(std)string toString() const; + +}; + +ANTLR_END_NAMESPACE + +#endif //INC_MismatchedTokenException_hpp__ diff --git a/poxml/antlr/antlr/NoViableAltException.hpp b/poxml/antlr/antlr/NoViableAltException.hpp new file mode 100644 index 00000000..f85bcf96 --- /dev/null +++ b/poxml/antlr/antlr/NoViableAltException.hpp @@ -0,0 +1,71 @@ +#ifndef INC_NoViableAltException_hpp__ +#define INC_NoViableAltException_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/RecognitionException.hpp" +#include "antlr/Token.hpp" +#include "antlr/AST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class NoViableAltException : public RecognitionException { +public: + const RefToken token; + const RefAST node; // handles parsing and treeparsing + + NoViableAltException(RefAST t); + + NoViableAltException(RefToken t,const ANTLR_USE_NAMESPACE(std)string& fileName_); + ~NoViableAltException() throw() {} + + /** + * @deprecated As of ANTLR 2.7.0 + */ + ANTLR_USE_NAMESPACE(std)string getErrorMessage() const; + + /** + * Returns a clean error message (no line number/column information) + */ + ANTLR_USE_NAMESPACE(std)string getMessage() const; + + /** + * Returns a string representation of this exception. + */ + virtual ANTLR_USE_NAMESPACE(std)string toString() const; +}; + +ANTLR_END_NAMESPACE + +#endif //INC_NoViableAltException_hpp__ diff --git a/poxml/antlr/antlr/NoViableAltForCharException.hpp b/poxml/antlr/antlr/NoViableAltForCharException.hpp new file mode 100644 index 00000000..756e9c7f --- /dev/null +++ b/poxml/antlr/antlr/NoViableAltForCharException.hpp @@ -0,0 +1,64 @@ +#ifndef INC_NoViableAltForCharException_hpp__ +#define INC_NoViableAltForCharException_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Institute + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Institute + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/RecognitionException.hpp" +#include "antlr/CharScanner.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class NoViableAltForCharException : public RecognitionException { +public: + int foundChar; + + NoViableAltForCharException(int c, CharScanner* scanner); + + NoViableAltForCharException(int c, const ANTLR_USE_NAMESPACE(std)string& fileName_, int line_); + ~NoViableAltForCharException() throw() {} + + /** + * @deprecated As of ANTLR 2.7.0 + */ + virtual ANTLR_USE_NAMESPACE(std)string getErrorMessage() const; + + /** + * Returns a clean error message (no line number/column information) + */ + virtual ANTLR_USE_NAMESPACE(std)string getMessage() const; +}; + +ANTLR_END_NAMESPACE + +#endif //INC_NoViableAltForCharException_hpp__ diff --git a/poxml/antlr/antlr/Parser.hpp b/poxml/antlr/antlr/Parser.hpp new file mode 100644 index 00000000..767953d3 --- /dev/null +++ b/poxml/antlr/antlr/Parser.hpp @@ -0,0 +1,213 @@ +#ifndef INC_Parser_hpp__ +#define INC_Parser_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/BitSet.hpp" +#include "antlr/TokenBuffer.hpp" +#include "antlr/RecognitionException.hpp" +#include "antlr/ASTFactory.hpp" +#include "antlr/ParserSharedInputState.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**A generic ANTLR parser (LL(k) for k>=1) containing a bunch of + * utility routines useful at any lookahead depth. We distinguish between + * the LL(1) and LL(k) parsers because of efficiency. This may not be + * necessary in the near future. + * + * Each parser object contains the state of the parse including a lookahead + * cache (the form of which is determined by the subclass), whether or + * not the parser is in guess mode, where tokens come from, etc... + * + * <p> + * During <b>guess</b> mode, the current lookahead token(s) and token type(s) + * cache must be saved because the token stream may not have been informed + * to save the token (via <tt>mark</tt>) before the <tt>try</tt> block. + * Guessing is started by: + * <ol> + * <li>saving the lookahead cache. + * <li>marking the current position in the TokenBuffer. + * <li>increasing the guessing level. + * </ol> + * + * After guessing, the parser state is restored by: + * <ol> + * <li>restoring the lookahead cache. + * <li>rewinding the TokenBuffer. + * <li>decreasing the guessing level. + * </ol> + * + * @see antlr.Token + * @see antlr.TokenBuffer + * @see antlr.TokenStream + * @see antlr.LL1Parser + * @see antlr.LLkParser + */ + +extern bool DEBUG_PARSER; + +class Parser { +protected: + ParserSharedInputState inputState; + + /** Nesting level of registered handlers */ + // int exceptionLevel; + + /** Table of token type to token names */ + ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> tokenNames; + /** AST return value for a rule is squirreled away here */ + RefAST returnAST; + /** AST support code; parser and treeparser delegate to this object */ + ASTFactory astFactory; + +// Parser(); + + Parser(TokenBuffer& input_); + Parser(TokenBuffer* input_); + + Parser(const ParserSharedInputState& state); + +public: + virtual ~Parser(); + +protected: + void setTokenNames(const char** tokenNames_); + +public: + /**Get another token object from the token stream */ + virtual void consume()=0; + + /** Consume tokens until one matches the given token */ + void consumeUntil(int tokenType); + + /** Consume tokens until one matches the given token set */ + void consumeUntil(const BitSet& set); + + /** Get the AST return value squirreled away in the parser */ + RefAST getAST(); + + ASTFactory& getASTFactory(); + + ANTLR_USE_NAMESPACE(std)string getFilename() const; + + virtual ParserSharedInputState getInputState() const; + + ANTLR_USE_NAMESPACE(std)string getTokenName(int num) const; + ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> getTokenNames() const; + + /** Return the token type of the ith token of lookahead where i=1 + * is the current token being examined by the parser (i.e., it + * has not been matched yet). + */ + virtual int LA(int i)=0; + + /**Return the ith token of lookahead */ + virtual RefToken LT(int i)=0; + + // Forwarded to TokenBuffer + virtual int mark(); + + /**Make sure current lookahead symbol matches token type <tt>t</tt>. + * Throw an exception upon mismatch, which is catch by either the + * error handler or by the syntactic predicate. + */ + void match(int t); + + /**Make sure current lookahead symbol matches the given set + * Throw an exception upon mismatch, which is catch by either the + * error handler or by the syntactic predicate. + */ + void match(const BitSet& b); + + void matchNot(int t); + + static void panic(); + + /** Parser error-reporting function can be overridden in subclass */ + virtual void reportError(const RecognitionException& ex); + + /** Parser error-reporting function can be overridden in subclass */ + virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s); + + /** Parser warning-reporting function can be overridden in subclass */ + virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s); + + virtual void rewind(int pos); + + /** Set the object used to generate ASTs */ +// void setASTFactory(ASTFactory astFactory_); + + /** Specify the type of node to create during tree building */ + void setASTNodeFactory(ASTFactory::factory_type factory); + + void setFilename(const ANTLR_USE_NAMESPACE(std)string& f); + + void setInputState(ParserSharedInputState state); + + /** Set or change the input token buffer */ +// void setTokenBuffer(TokenBuffer<Token>* t); + + virtual void traceIndent(); + virtual void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname); + virtual void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname); +protected: + int traceDepth; // used to keep track of the indentation for the trace + +protected: + /** Utility class which allows tracing to work even when exceptions are + * thrown. + */ + class Tracer { + private: + Parser* parser; + ANTLR_USE_NAMESPACE(std)string text; + public: + Tracer(Parser* p,const ANTLR_USE_NAMESPACE(std)string& t) + : parser(p), text(t) { parser->traceIn(text); } + ~Tracer() + { parser->traceOut(text); } + private: + Tracer(const Tracer&); // undefined + const Tracer& operator=(const Tracer&); // undefined + }; + +private: + Parser(const Parser&); // undefined + const Parser& operator=(const Parser&); // undefined +}; + +ANTLR_END_NAMESPACE + +#endif //INC_Parser_hpp__ diff --git a/poxml/antlr/antlr/ParserSharedInputState.hpp b/poxml/antlr/antlr/ParserSharedInputState.hpp new file mode 100644 index 00000000..b5599954 --- /dev/null +++ b/poxml/antlr/antlr/ParserSharedInputState.hpp @@ -0,0 +1,42 @@ +#ifndef INC_ParserSharedInputState_hpp__ +#define INC_ParserSharedInputState_hpp__ + +#include "antlr/config.hpp" +#include "antlr/TokenBuffer.hpp" +#include "antlr/RefCount.hpp" +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input stream of tokens. Multiple parsers + * share a single ParserSharedInputState to parse + * the same stream of tokens. + */ +class ParserInputState { +public: + ParserInputState(TokenBuffer* input_); + ParserInputState(TokenBuffer& input_); + ~ParserInputState(); + +public: + /** Are we guessing (guessing>0)? */ + int guessing; //= 0; + /** What file (if known) caused the problem? */ + ANTLR_USE_NAMESPACE(std)string filename; + TokenBuffer& getInput(); +private: + /** Where to get token objects */ + TokenBuffer* input; + bool inputResponsible; + + // we don't want these: + ParserInputState(const ParserInputState&); + ParserInputState& operator=(const ParserInputState&); +}; + +typedef RefCount<ParserInputState> ParserSharedInputState; + +ANTLR_END_NAMESPACE + +#endif //INC_ParserSharedInputState_hpp__ diff --git a/poxml/antlr/antlr/RecognitionException.hpp b/poxml/antlr/antlr/RecognitionException.hpp new file mode 100644 index 00000000..c6439111 --- /dev/null +++ b/poxml/antlr/antlr/RecognitionException.hpp @@ -0,0 +1,78 @@ +#ifndef INC_RecognitionException_hpp__ +#define INC_RecognitionException_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/ANTLRException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class RecognitionException : public ANTLRException { +public: + ANTLR_USE_NAMESPACE(std)string fileName; // not used by treeparsers + int line; // not used by treeparsers + int column; // not used by treeparsers + + RecognitionException(); + + RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s); + + RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s,const ANTLR_USE_NAMESPACE(std)string& fileName_,int line); + RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s,const ANTLR_USE_NAMESPACE(std)string& fileName_,int line,int column); + ~RecognitionException() throw() {} + + /** + * @return the column number that this exception happened on. + * @author Shawn P. Vincent (svincent@svincent.com) + */ + virtual int getColumn() const; + /** + * @deprecated As of ANTLR 2.7.0 + */ + virtual ANTLR_USE_NAMESPACE(std)string getErrorMessage() const; +protected: + virtual ANTLR_USE_NAMESPACE(std)string getFileLineString() const; +public: + virtual ANTLR_USE_NAMESPACE(std)string getFilename() const; + /** + * @return the line number that this exception happened on. + * @author Shawn P. Vincent (svincent@svincent.com) + */ + virtual int getLine() const; + virtual ANTLR_USE_NAMESPACE(std)string toString() const; +}; + +ANTLR_END_NAMESPACE + +#endif //INC_RecognitionException_hpp__ diff --git a/poxml/antlr/antlr/RefCount.hpp b/poxml/antlr/antlr/RefCount.hpp new file mode 100644 index 00000000..9306576b --- /dev/null +++ b/poxml/antlr/antlr/RefCount.hpp @@ -0,0 +1,87 @@ +#ifndef INC_RefCount_hpp__ +#define INC_RefCount_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +template<class T> +class RefCount { +private: + struct Ref { + T* const ptr; + unsigned int count; + + Ref(T* p) : ptr(p), count(1) {} + ~Ref() {delete ptr;} + Ref* increment() {++count;return this;} + bool decrement() {return (--count==0);} + private: + Ref(const Ref&); + Ref& operator=(const Ref&); + }* ref; + +public: + explicit RefCount(T* p=0) + : ref(p ? new Ref(p) : 0) + { + } + RefCount(const RefCount<T>& other) + : ref(other.ref ? other.ref->increment() : 0) + { + } + ~RefCount() + { + if (ref && ref->decrement()) delete ref; + } + RefCount<T>& operator=(const RefCount<T>& other) + { + Ref* tmp=other.ref ? other.ref->increment() : 0; + if (ref && ref->decrement()) delete ref; + ref=tmp; + return *this; + } + + operator T* () const + { return ref ? ref->ptr : 0; } + T* operator->() const + { return ref ? ref->ptr : 0; } + T* get() const + { return ref ? ref->ptr : 0; } +}; + +ANTLR_END_NAMESPACE + +#endif //INC_RefCount_hpp__ diff --git a/poxml/antlr/antlr/SemanticException.hpp b/poxml/antlr/antlr/SemanticException.hpp new file mode 100644 index 00000000..3f1a9447 --- /dev/null +++ b/poxml/antlr/antlr/SemanticException.hpp @@ -0,0 +1,52 @@ +#ifndef INC_SemanticException_hpp__ +#define INC_SemanticException_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/RecognitionException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class SemanticException : public RecognitionException { +public: + SemanticException(const ANTLR_USE_NAMESPACE(std)string& s) + : RecognitionException(s) {} + SemanticException(const ANTLR_USE_NAMESPACE(std)string& s,const ANTLR_USE_NAMESPACE(std)string& fileName_,int line_) + : RecognitionException(s,fileName_,line_) {} + ~SemanticException() throw() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_SemanticException_hpp__ diff --git a/poxml/antlr/antlr/String.hpp b/poxml/antlr/antlr/String.hpp new file mode 100644 index 00000000..5fac82d6 --- /dev/null +++ b/poxml/antlr/antlr/String.hpp @@ -0,0 +1,47 @@ +#ifndef INC_String_hpp__ +#define INC_String_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +ANTLR_USE_NAMESPACE(std)string operator+(const ANTLR_USE_NAMESPACE(std)string& lhs,int rhs); + +ANTLR_USE_NAMESPACE(std)string charName(int ch); + +ANTLR_END_NAMESPACE + +#endif //INC_String_hpp__ diff --git a/poxml/antlr/antlr/Token.hpp b/poxml/antlr/antlr/Token.hpp new file mode 100644 index 00000000..b85551c3 --- /dev/null +++ b/poxml/antlr/antlr/Token.hpp @@ -0,0 +1,106 @@ +#ifndef INC_Token_hpp__ +#define INC_Token_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/RefCount.hpp" +#include <string> + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** A token is minimally a token type. Subclasses can add the text matched + * for the token and line info. + */ + +class Token; +typedef RefCount<Token> RefToken; + +class Token { +public: + // constants +#ifndef NO_STATIC_CONSTS + static const int MIN_USER_TYPE = 4; + static const int NULL_TREE_LOOKAHEAD = 3; + static const int INVALID_TYPE = 0; + static const int EOF_TYPE = 1; + static const int SKIP = -1; +#else + enum { + MIN_USER_TYPE = 4, + NULL_TREE_LOOKAHEAD = 3, + INVALID_TYPE = 0, + EOF_TYPE = 1, + SKIP = -1 + }; +#endif + + // each Token has at least a token type + int type; //=INVALID_TYPE; + +public: + // the illegal token object + static RefToken badToken; // = new Token(INVALID_TYPE, "<no text>"); + + Token(); + Token(int t); + Token(int t, const ANTLR_USE_NAMESPACE(std)string& txt); + + virtual int getColumn() const; + virtual int getLine() const; + virtual ANTLR_USE_NAMESPACE(std)string getText() const; + virtual int getType() const; + + virtual void setColumn(int c); + + virtual void setLine(int l); + virtual void setText(const ANTLR_USE_NAMESPACE(std)string& t); + virtual void setType(int t); + + virtual ANTLR_USE_NAMESPACE(std)string toString() const; + + virtual ~Token(); +private: + Token(const Token&); + const Token& operator=(const Token&); +}; + +#ifdef NEEDS_OPERATOR_LESS_THAN +inline operator<(RefToken l,RefToken r); //{return true;} +#endif + +extern RefToken nullToken; + +ANTLR_END_NAMESPACE + +#endif //INC_Token_hpp__ diff --git a/poxml/antlr/antlr/TokenBuffer.hpp b/poxml/antlr/antlr/TokenBuffer.hpp new file mode 100644 index 00000000..b7c1b25f --- /dev/null +++ b/poxml/antlr/antlr/TokenBuffer.hpp @@ -0,0 +1,141 @@ +#ifndef INC_TokenBuffer_hpp__ +#define INC_TokenBuffer_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/TokenStream.hpp" +#include "antlr/CircularQueue.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**A Stream of Token objects fed to the parser from a TokenStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input tokens. Normally, + * "k" tokens are stored in the buffer. More tokens may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of tokens is deferred. In other words, reading the next + * token is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.Token + * @see antlr.TokenStream + * @see antlr.TokenQueue + */ +class TokenBuffer { +protected: + // Token source + TokenStream& input; + +private: + // Number of active markers + int nMarkers; + + // Additional offset used when markers are active + int markerOffset; + + // Number of calls to consume() since last LA() or LT() call + int numToConsume; + + // Circular queue + CircularQueue<RefToken> queue; + +public: + /** Create a token buffer */ + TokenBuffer(TokenStream& input_); + + /** Mark another token for deferred consumption */ + void consume(); + +private: + /** Ensure that the token buffer is sufficiently full */ + void fill(int amount); + +public: + /** Get a lookahead token value */ + int LA(int i); + + /** Get a lookahead token */ + RefToken LT(int i); + + /**Return an integer marker that can be used to rewind the buffer to + * its current state. + */ + int mark(); + + /**Rewind the token buffer to a marker. + * @param mark Marker returned previously from mark() + */ + void rewind(int mark); + +private: + /** Sync up deferred consumption */ + void syncConsume(); + +private: + TokenBuffer(const TokenBuffer& other); + const TokenBuffer& operator=(const TokenBuffer& other); +public: +// virtual ~TokenBuffer() {} +}; + +/** Sync up deferred consumption */ +inline void TokenBuffer::syncConsume() +{ +#ifdef OLD_CODE + while (numToConsume > 0) { + if (nMarkers > 0) { + // guess mode -- leave leading tokens and bump offset. + markerOffset++; + } else { + // normal mode -- remove first token + queue.removeFirst(); + } + numToConsume--; + } +#endif + + if (numToConsume > 0) { + if (nMarkers > 0) { + markerOffset += numToConsume; + } else { + queue.removeItems( numToConsume ); + } + numToConsume = 0; + } +} + +ANTLR_END_NAMESPACE + +#endif //INC_TokenBuffer_hpp__ diff --git a/poxml/antlr/antlr/TokenStream.hpp b/poxml/antlr/antlr/TokenStream.hpp new file mode 100644 index 00000000..e8436419 --- /dev/null +++ b/poxml/antlr/antlr/TokenStream.hpp @@ -0,0 +1,54 @@ +#ifndef INC_TokenStream_hpp__ +#define INC_TokenStream_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/**This interface allows any object to pretend it is a stream + * of tokens. + * @author Terence Parr, MageLang Institute + */ + +#include "antlr/config.hpp" +#include "antlr/Token.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class TokenStream { +public: + virtual RefToken nextToken()=0; + virtual ~TokenStream() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStream_hpp__ diff --git a/poxml/antlr/antlr/TokenStreamBasicFilter.hpp b/poxml/antlr/antlr/TokenStreamBasicFilter.hpp new file mode 100644 index 00000000..5438878b --- /dev/null +++ b/poxml/antlr/antlr/TokenStreamBasicFilter.hpp @@ -0,0 +1,35 @@ +#ifndef INC_TokenStreamBasicFilter_hpp__ +#define INC_TokenStreamBasicFilter_hpp__ + +#include "antlr/config.hpp" +#include "antlr/BitSet.hpp" +#include "antlr/TokenStream.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object is a TokenStream that passes through all + * tokens except for those that you tell it to discard. + * There is no buffering of the tokens. + */ +class TokenStreamBasicFilter : public TokenStream { + /** The set of token types to discard */ +protected: + BitSet discardMask; + + /** The input stream */ +protected: + TokenStream* input; + +public: + TokenStreamBasicFilter(TokenStream& input_); + + void discard(int ttype); + + void discard(const BitSet& mask); + + RefToken nextToken(); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStreamBasicFilter_hpp__ diff --git a/poxml/antlr/antlr/TokenStreamException.hpp b/poxml/antlr/antlr/TokenStreamException.hpp new file mode 100644 index 00000000..2dc96776 --- /dev/null +++ b/poxml/antlr/antlr/TokenStreamException.hpp @@ -0,0 +1,19 @@ +#ifndef INC_TokenStreamException_hpp__ +#define INC_TokenStreamException_hpp__ + +#include "antlr/config.hpp" +#include "antlr/ANTLRException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class TokenStreamException : public ANTLRException { +public: + TokenStreamException() {} + TokenStreamException(const ANTLR_USE_NAMESPACE(std)string& s) + : ANTLRException(s) {} + virtual ~TokenStreamException() throw() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStreamException_hpp__ diff --git a/poxml/antlr/antlr/TokenStreamHiddenTokenFilter.hpp b/poxml/antlr/antlr/TokenStreamHiddenTokenFilter.hpp new file mode 100644 index 00000000..47aad001 --- /dev/null +++ b/poxml/antlr/antlr/TokenStreamHiddenTokenFilter.hpp @@ -0,0 +1,84 @@ +#ifndef INC_TokenStreamHiddenTokenFilter_hpp__ +#define INC_TokenStreamHiddenTokenFilter_hpp__ + +#include "antlr/config.hpp" +#include "antlr/TokenStreamBasicFilter.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**This object filters a token stream coming from a lexer + * or another TokenStream so that only certain token channels + * get transmitted to the parser. + * + * Any of the channels can be filtered off as "hidden" channels whose + * tokens can be accessed from the parser. + */ +class TokenStreamHiddenTokenFilter : public TokenStreamBasicFilter { + // protected BitSet discardMask; +protected: + BitSet hideMask; + +private: + RefToken nextMonitoredToken; + +protected: + /** track tail of hidden list emanating from previous + * monitored token + */ + RefToken lastHiddenToken; + + RefToken firstHidden; // = null; + +public: + TokenStreamHiddenTokenFilter(TokenStream& input); + +protected: + void consume(); + +private: + void consumeFirst(); + +public: + BitSet getDiscardMask() const; + + /** Return a ptr to the hidden token appearing immediately after + * token t in the input stream. + */ + RefToken getHiddenAfter(RefToken t); + + /** Return a ptr to the hidden token appearing immediately before + * token t in the input stream. + */ + RefToken getHiddenBefore(RefToken t); + + BitSet getHideMask() const; + + /** Return the first hidden token if one appears + * before any monitored token. + */ + RefToken getInitialHiddenToken(); + + void hide(int m); + + void hide(const BitSet& mask); + +protected: + RefToken LA(int i); + +public: +/** Return the next monitored token. + * Test the token following the monitored token. + * If following is another monitored token, save it + * for the next invocation of nextToken (like a single + * lookahead token) and return it then. + * If following is unmonitored, nondiscarded (hidden) + * channel token, add it to the monitored token. + * + * Note: EOF must be a monitored Token. + */ + RefToken nextToken(); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStreamHiddenTokenFilter_hpp__ diff --git a/poxml/antlr/antlr/TokenStreamIOException.hpp b/poxml/antlr/antlr/TokenStreamIOException.hpp new file mode 100644 index 00000000..9ac6d759 --- /dev/null +++ b/poxml/antlr/antlr/TokenStreamIOException.hpp @@ -0,0 +1,22 @@ +#ifndef INC_TokenStreamIOException_hpp__ +#define INC_TokenStreamIOException_hpp__ + +#include "antlr/config.hpp" +#include "antlr/TokenStreamException.hpp" +#include <exception> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class TokenStreamIOException : public TokenStreamException { +public: + ANTLR_USE_NAMESPACE(std)exception io; + + TokenStreamIOException() {} + TokenStreamIOException(const ANTLR_USE_NAMESPACE(std)exception& e) + : TokenStreamException(e.what()), io(e) {} + ~TokenStreamIOException() throw() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStreamIOException_hpp__ diff --git a/poxml/antlr/antlr/TokenStreamRecognitionException.hpp b/poxml/antlr/antlr/TokenStreamRecognitionException.hpp new file mode 100644 index 00000000..4aa4609f --- /dev/null +++ b/poxml/antlr/antlr/TokenStreamRecognitionException.hpp @@ -0,0 +1,21 @@ +#ifndef INC_TokenStreamRecognitionException_hpp__ +#define INC_TokenStreamRecognitionException_hpp__ + +#include "antlr/config.hpp" +#include "antlr/TokenStreamException.hpp" +#include <exception> + +ANTLR_BEGIN_NAMESPACE(antlr) + +class TokenStreamRecognitionException : public TokenStreamException { +public: + RecognitionException recog; + + TokenStreamRecognitionException(RecognitionException& re) + : TokenStreamException(re.getMessage()), recog(re) {} + ~TokenStreamRecognitionException() throw() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStreamRecognitionException_hpp__ diff --git a/poxml/antlr/antlr/TokenStreamRetryException.hpp b/poxml/antlr/antlr/TokenStreamRetryException.hpp new file mode 100644 index 00000000..a940d8c4 --- /dev/null +++ b/poxml/antlr/antlr/TokenStreamRetryException.hpp @@ -0,0 +1,17 @@ +#ifndef INC_TokenStreamRetryException_hpp__ +#define INC_TokenStreamRetryException_hpp__ + +#include "antlr/config.hpp" +#include "antlr/TokenStreamException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class TokenStreamRetryException : public TokenStreamException { +public: + TokenStreamRetryException() {} + ~TokenStreamRetryException() throw() {} +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStreamRetryException_hpp__ diff --git a/poxml/antlr/antlr/TokenStreamSelector.hpp b/poxml/antlr/antlr/TokenStreamSelector.hpp new file mode 100644 index 00000000..7e7d7398 --- /dev/null +++ b/poxml/antlr/antlr/TokenStreamSelector.hpp @@ -0,0 +1,78 @@ +#ifndef INC_TokenStreamSelector_hpp__ +#define INC_TokenStreamSelector_hpp__ + +#include "antlr/config.hpp" +#include "antlr/TokenStream.hpp" +#include <map> +#include <stack> + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** A token stream MUX (multiplexor) knows about n token streams + * and can multiplex them onto the same channel for use by token + * stream consumer like a parser. This is a way to have multiple + * lexers break up the same input stream for a single parser. + * Or, you can have multiple instances of the same lexer handle + * multiple input streams; this works great for includes. + */ +class TokenStreamSelector : public TokenStream { +protected: + /** The set of inputs to the MUX */ +#ifdef OS_NO_ALLOCATOR + typedef ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string> lessp; + typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,TokenStream*,lessp> inputStreamNames_coll; +#else + typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,TokenStream*> inputStreamNames_coll; +#endif + inputStreamNames_coll inputStreamNames; + + /** The currently-selected token stream input */ + TokenStream* input; + + /** Used to track stack of input streams */ +#ifdef OS_NO_ALLOCATOR + typedef ANTLR_USE_NAMESPACE(std)stack<TokenStream*, ANTLR_USE_NAMESPACE(std)deque<TokenStream*> > streamStack_coll; +#else + typedef ANTLR_USE_NAMESPACE(std)stack<TokenStream*> streamStack_coll; +#endif + streamStack_coll streamStack; + +public: + TokenStreamSelector(); + ~TokenStreamSelector(); + + void addInputStream(TokenStream* stream, const ANTLR_USE_NAMESPACE(std)string& key); + + /** Return the stream from which tokens are being pulled at + * the moment. + */ + TokenStream* getCurrentStream() const; + + TokenStream* getStream(const ANTLR_USE_NAMESPACE(std)string& sname) const; + + RefToken nextToken(); + + TokenStream* pop(); + + void push(TokenStream* stream); + + void push(const ANTLR_USE_NAMESPACE(std)string& sname); + + /** Abort recognition of current Token and try again. + * A stream can push a new stream (for include files + * for example, and then retry(), which will cause + * the current stream to abort back to this.nextToken(). + * this.nextToken() then asks for a token from the + * current stream, which is the new "substream." + */ + void retry(); + +/** Set the stream without pushing old stream */ + void select(TokenStream* stream); + + void select(const ANTLR_USE_NAMESPACE(std)string& sname); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TokenStreamSelector_hpp__ diff --git a/poxml/antlr/antlr/TreeParser.hpp b/poxml/antlr/antlr/TreeParser.hpp new file mode 100644 index 00000000..ed474bd1 --- /dev/null +++ b/poxml/antlr/antlr/TreeParser.hpp @@ -0,0 +1,159 @@ +#ifndef INC_TreeParser_hpp__ +#define INC_TreeParser_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ +#include "antlr/config.hpp" +#include "antlr/AST.hpp" +#include "antlr/ASTFactory.hpp" +#include "antlr/BitSet.hpp" +#include "antlr/RecognitionException.hpp" +#include "antlr/TreeParserSharedInputState.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +class TreeParser { +public: + TreeParser(); + TreeParser(const TreeParserSharedInputState& state); + virtual ~TreeParser(); + +protected: + void setTokenNames(const char** tokenNames_); + +public: + /** The AST Null object; the parsing cursor is set to this when + * it is found to be null. This way, we can test the + * token type of a node without having to have tests for null + * everywhere. + */ + static RefAST ASTNULL; + +protected: + /** Where did this rule leave off parsing; avoids a return parameter */ + RefAST _retTree; + + /** guessing nesting level; guessing==0 implies not guessing */ + // int guessing; // = 0; + + /** Nesting level of registered handlers */ + // int exceptionLevel; // = 0; + + TreeParserSharedInputState inputState; + + /** Table of token type to token names */ + ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> tokenNames; + + /** AST return value for a rule is squirreled away here */ + RefAST returnAST; + + /** AST support code; parser and treeparser delegate to this object */ + ASTFactory astFactory; // = new ASTFactory(); + + /** Used to keep track of indent depth with -traceTreeParser */ + int traceDepth; + +public: + /** Get the AST return value squirreled away in the parser */ + RefAST getAST() const { + return returnAST; + } + + ANTLR_USE_NAMESPACE(std)string getTokenName(int num) const; + ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> getTokenNames() const; + +protected: + void match(RefAST t, int ttype); + +public: + /**Make sure current lookahead symbol matches the given set + * Throw an exception upon mismatch, which is catch by either the + * error handler or by the syntactic predicate. + */ + void match(RefAST t, const BitSet& b); + +protected: + void matchNot(RefAST t, int ttype); + +public: + static void panic(); + + /** Parser error-reporting function can be overridden in subclass */ + virtual void reportError(const RecognitionException& ex); + + /** Parser error-reporting function can be overridden in subclass */ + virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s); + + /** Parser warning-reporting function can be overridden in subclass */ + virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s); + + /** Specify an object with support code (shared by + * Parser and TreeParser. Normally, the programmer + * does not play with this, using setASTNodeType instead. + */ +// void setASTFactory(ASTFactory f); + + /** Specify the type of node to create during tree building */ + void setASTNodeFactory(ASTFactory::factory_type factory); + +protected: + /** Utility class which allows tracing to work even when exceptions are + * thrown. + */ + class Tracer { + private: + TreeParser* parser; + ANTLR_USE_NAMESPACE(std)string text; + RefAST tree; + public: + Tracer(TreeParser* p,const ANTLR_USE_NAMESPACE(std)string& t, RefAST a) + : parser(p), text(t), tree(a) { parser->traceIn(text,tree); } + ~Tracer() + { parser->traceOut(text,tree); } + private: + Tracer(const Tracer&); // undefined + const Tracer& operator=(const Tracer&); // undefined + }; + +public: + void traceIndent(); + void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname, RefAST t); + void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname, RefAST t); + +private: + TreeParser(const TreeParser& other); + TreeParser& operator=(const TreeParser& other); +}; + +ANTLR_END_NAMESPACE + +#endif //INC_TreeParser_hpp__ diff --git a/poxml/antlr/antlr/TreeParserSharedInputState.hpp b/poxml/antlr/antlr/TreeParserSharedInputState.hpp new file mode 100644 index 00000000..8f7b0922 --- /dev/null +++ b/poxml/antlr/antlr/TreeParserSharedInputState.hpp @@ -0,0 +1,34 @@ +#ifndef INC_TreeParserSharedInputState_hpp__ +#define INC_TreeParserSharedInputState_hpp__ + +#include "antlr/config.hpp" +#include "antlr/RefCount.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input AST. Multiple parsers + * share a single TreeParserSharedInputState to parse + * the same tree or to have the parser walk multiple + * trees. + */ +class TreeParserInputState { +public: + TreeParserInputState(); + ~TreeParserInputState(); + +public: + /** Are we guessing (guessing>0)? */ + int guessing; //= 0; + +private: + // we don't want these: + TreeParserInputState(const TreeParserInputState&); + TreeParserInputState& operator=(const TreeParserInputState&); +}; + +typedef RefCount<TreeParserInputState> TreeParserSharedInputState; + +ANTLR_END_NAMESPACE + +#endif //INC_TreeParserSharedInputState_hpp__ diff --git a/poxml/antlr/antlr/config.hpp b/poxml/antlr/antlr/config.hpp new file mode 100644 index 00000000..8ac94a3a --- /dev/null +++ b/poxml/antlr/antlr/config.hpp @@ -0,0 +1,168 @@ +#ifndef INC_config_hpp__ +#define INC_config_hpp__ + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/* + * Just a simple configuration file to differentiate between the + * various compilers used. + */ + +/* + Some compilers do not accept namespaces std:: for example. + In this case, just define #define ANTLR_USE_NAMESPACE(_x_). + + See SunWorkShop 4.2 for example. + */ +#define ANTLR_USE_NAMESPACE(_x_) _x_:: +#define ANTLR_USING_NAMESPACE(_x_) using namespace _x_; +#define ANTLR_BEGIN_NAMESPACE(_x_) namespace _x_ { +#define ANTLR_END_NAMESPACE } +#define ANTLR_C_USING(_x_) + +#if defined(_MSC_VER) && !defined(__ICL) // Microsoft Visual C++ + +// This warning really gets on my nerves. +// It's the one about symbol longer than 256 chars, and it happens +// all the time with STL. +#pragma warning( disable : 4786 ) + +// Now, some defines for shortcomings in the MS compiler: + +// Not allowed to put 'static const int XXX=20;' in a class definition +#define NO_STATIC_CONSTS +// Using vector<XXX> requires operator<(X,X) to be defined +#define NEEDS_OPERATOR_LESS_THAN +// No strcasecmp in the C library (so use stricmp instead) +// - Anyone know which is in which standard? +#define NO_STRCASECMP + +#endif + +#if defined(__ICL) +#define NO_STRCASECMP +#endif + +// +// SunPro Compiler (Using OBJECTSPACE STL) +// +#ifdef __SUNPRO_CC + +#if (__SUNPRO_CC >= 0x500) + +#define NEEDS_OPERATOR_LESS_THAN +#define NO_TEMPLATE_PARTS + +#else + +#undef namespace +#define namespace + + +#if (__SUNPRO_CC == 0x420) + +/* This code is specif to SunWspro Compiler 4.2, and will compile with + the objectspace 2.1 toolkit for Solaris2.6 */ +#define HAS_NOT_CASSERT_H +#define HAS_NOT_CSTRING_H +#define HAS_NOT_CCTYPE_H +#define HAS_NOT_CSTDIO_H +#define HAS_OSTREAM_H + +/* #define OS_SOLARIS_2_6 +#define OS_NO_WSTRING +#define OS_NO_ALLOCATORS +#define OS_MULTI_THREADED +#define OS_SOLARIS_NATIVE +#define OS_REALTIME +#define __OSVERSION__=5 +#define SVR4 +*/ + +// ObjectSpace + some specific templates constructions with stl. +/* #define OS_NO_ALLOCATOR */ + +// This great compiler does not have the namespace feature. +#undef ANTLR_USE_NAMESPACE +#define ANTLR_USE_NAMESPACE(_x_) +#undef ANTLR_USING_NAMESPACE +#define ANTLR_USING_NAMESPACE(_x_) +#undef ANTLR_BEGIN_NAMESPACE +#define ANTLR_BEGIN_NAMESPACE(_x_) +#undef ANTLR_END_NAMESPACE +#define ANTLR_END_NAMESPACE + +#endif + +#undef explicit +#define explicit + +#define exception os_exception +#define bad_exception os_bad_exception + +// Not allowed to put 'static const int XXX=20;' in a class definition +#define NO_STATIC_CONSTS +// Using vector<XXX> requires operator<(X,X) to be defined +#define NEEDS_OPERATOR_LESS_THAN + +#endif + +#endif + +// +// Inprise C++ Builder 3.0 +// +#ifdef __BCPLUSPLUS__ + +#define NO_TEMPLATE_PARTS +#define NO_STRCASECMP +#endif + +#ifdef _AIX +#include <strings.h> +#endif + +// +// Metrowerks Codewarrior +// +#ifdef __MWERKS__ +#if (__MWERKS__ <= 0x2201) +#define NO_TEMPLATE_PARTS +#define ANTLR_REALLY_NO_STRCASECMP +#endif + +#undef ANTLR_C_USING +#define ANTLR_C_USING(_x_) using std:: ## _x_; +#endif + +#endif //INC_config_hpp__ diff --git a/poxml/antlr/configure.in b/poxml/antlr/configure.in new file mode 100644 index 00000000..66f8cf15 --- /dev/null +++ b/poxml/antlr/configure.in @@ -0,0 +1,22 @@ +AC_INIT(src/Parser.cpp) + +PACKAGE=libantlr +VERSION="2.7.1" +LIBANTLR_SO_VERSION=0:0:0 + +AM_INIT_AUTOMAKE($PACKAGE, $VERSION) + +AM_DISABLE_SHARED +AM_PROG_LIBTOOL +AC_PROG_CXX +AC_PROG_CXXCPP +AC_PROG_RANLIB + +test "$ac_cv_prog_gxx" = 'yes' && CXXFLAGS="$CXXFLAGS -W -Wall -pipe" + +AC_SUBST(LIBANTLR_SO_VERSION) +AC_SUBST(LIBTOOL_DEPS) + +AC_OUTPUT(Makefile \ + src/Makefile \ + antlr/Makefile) diff --git a/poxml/antlr/src/ANTLRException.cpp b/poxml/antlr/src/ANTLRException.cpp new file mode 100644 index 00000000..42632e71 --- /dev/null +++ b/poxml/antlr/src/ANTLRException.cpp @@ -0,0 +1,57 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/ANTLRException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +ANTLRException::ANTLRException() : text("") +{} + +ANTLRException::ANTLRException(const ANTLR_USE_NAMESPACE(std)string& s) +: text(s) +{} + +ANTLRException::~ANTLRException() throw() +{} + +ANTLR_USE_NAMESPACE(std)string ANTLRException::toString() const +{ return text; } + +ANTLR_USE_NAMESPACE(std)string ANTLRException::getMessage() const +{ return text; } + +const char* ANTLRException::what() const throw() +{ return text.c_str(); } + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/ASTFactory.cpp b/poxml/antlr/src/ASTFactory.cpp new file mode 100644 index 00000000..e44386f7 --- /dev/null +++ b/poxml/antlr/src/ASTFactory.cpp @@ -0,0 +1,218 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/ASTFactory.hpp" +#include "antlr/CommonAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** AST Support code shared by TreeParser and Parser. + * We use delegation to share code (and have only one + * bit of code to maintain) rather than subclassing + * or superclassing (forces AST support code to be + * loaded even when you don't want to do AST stuff). + * + * Typically, setASTNodeType is used to specify the + * type of node to create, but you can override + * create to make heterogeneous nodes etc... + */ + +ASTFactory::ASTFactory() : nodeFactory(&CommonAST::factory) +{ +} + +/** Add a child to the current AST */ +void ASTFactory::addASTChild(ASTPair& currentAST, RefAST child) +{ + if (child) { + if (!currentAST.root) { + // Make new child the current root + currentAST.root = child; + } + else { + if (!currentAST.child) { + // Add new child to current root + currentAST.root->setFirstChild(child); + } + else { + currentAST.child->setNextSibling(child); + } + } + // Make new child the current child + currentAST.child = child; + currentAST.advanceChildToEnd(); + } +} +/** Create a new empty AST node; if the user did not specify + * an AST node type, then create a default one: CommonAST. + */ +RefAST ASTFactory::create() +{ + RefAST node = nodeFactory(); + node->setType(Token::INVALID_TYPE); + return node; +} + +RefAST ASTFactory::create(int type) +{ + RefAST t = nodeFactory(); + t->initialize(type,""); + return t; +} + +RefAST ASTFactory::create(int type, const ANTLR_USE_NAMESPACE(std)string& txt) +{ + RefAST t = nodeFactory(); + t->initialize(type,txt); + return t; +} + +/** Create a new empty AST node; if the user did not specify + * an AST node type, then create a default one: CommonAST. + */ +RefAST ASTFactory::create(RefAST tr) +{ + if (!tr) + return nullAST; + + RefAST t = nodeFactory(); + t->initialize(tr); + return t; +} + +RefAST ASTFactory::create(RefToken tok) +{ + RefAST t = nodeFactory(); + t->initialize(tok); + return t; +} +/** Copy a single node. clone() is not used because + * we want to return an AST not a plain object...a type + * safety issue. Further, we want to have all AST node + * creation go through the factory so creation can be + * tracked. Returns null if t is null. + */ +RefAST ASTFactory::dup(RefAST t) +{ + return create(t); // if t==null, create returns null +} + +/** Duplicate tree including siblings of root. */ +RefAST ASTFactory::dupList(RefAST t) +{ + RefAST result = dupTree(t); // if t == null, then result==null + RefAST nt = result; + while (t) { // for each sibling of the root + t = t->getNextSibling(); + nt->setNextSibling(dupTree(t)); // dup each subtree, building new tree + nt = nt->getNextSibling(); + } + return result; +} +/**Duplicate a tree, assuming this is a root node of a tree-- + * duplicate that node and what's below; ignore siblings of root node. + */ +RefAST ASTFactory::dupTree(RefAST t) +{ + RefAST result = dup(t); // make copy of root + // copy all children of root. + if (t) { + result->setFirstChild( dupList(t->getFirstChild()) ); + } + return result; +} +/** Make a tree from a list of nodes. The first element in the + * array is the root. If the root is null, then the tree is + * a simple list not a tree. Handles null children nodes correctly. + * For example, build(a, b, null, c) yields tree (a b c). build(null,a,b) + * yields tree (nil a b). + */ +RefAST ASTFactory::make(ANTLR_USE_NAMESPACE(std)vector<RefAST> nodes) +{ + if ( nodes.size()==0 ) + return RefAST(nullASTptr); + RefAST root = nodes[0]; + RefAST tail = RefAST(nullASTptr); + if (root) { + root->setFirstChild(RefAST(nullASTptr)); // don't leave any old pointers set + } + // link in children; + for (unsigned int i=1; i<nodes.size(); i++) { + if ( !nodes[i] ) continue; // ignore null nodes + if ( !root ) { + // Set the root and set it up for a flat list + root = tail = nodes[i]; + } + else if ( !tail ) { + root->setFirstChild(nodes[i]); + tail = root->getFirstChild(); + } + else { + tail->setNextSibling(nodes[i]); + tail = tail->getNextSibling(); + } + // Chase tail to last sibling + while (tail->getNextSibling()) { + tail = tail->getNextSibling(); + } + } + return root; +} +/** Make a tree from a list of nodes, where the nodes are contained + * in an ASTArray object + */ +RefAST ASTFactory::make(ASTArray* nodes) +{ + RefAST ret = make(nodes->array); + delete nodes; + return ret; +} +/** Make an AST the root of current AST */ +void ASTFactory::makeASTRoot(ASTPair& currentAST, RefAST root) +{ + if (root) { + // Add the current root as a child of new root + root->addChild(currentAST.root); + // The new current child is the last sibling of the old root + currentAST.child = currentAST.root; + currentAST.advanceChildToEnd(); + // Set the new root + currentAST.root = root; + } +} +void ASTFactory::setASTNodeFactory(factory_type factory) +{ + nodeFactory = factory; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/ASTRefCount.cpp b/poxml/antlr/src/ASTRefCount.cpp new file mode 100644 index 00000000..1da98306 --- /dev/null +++ b/poxml/antlr/src/ASTRefCount.cpp @@ -0,0 +1,74 @@ +#include "antlr/ASTRefCount.hpp" +#include "antlr/AST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +ASTRef::ASTRef(AST* p) + : ptr(p), count(1) +{ + if (p && !p->ref) + p->ref = this; +} + +ASTRef::~ASTRef() +{ + delete ptr; +} + +ASTRef* ASTRef::increment() +{ + ++count; + return this; +} + +bool ASTRef::decrement() +{ + return (--count==0); +} + +ASTRef* ASTRef::getRef(const AST* p) +{ + if (p) { + AST* pp = const_cast<AST*>(p); + if (pp->ref) + return pp->ref->increment(); + else + return new ASTRef(pp); + } else + return 0; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/BaseAST.cpp b/poxml/antlr/src/BaseAST.cpp new file mode 100644 index 00000000..4080e0e8 --- /dev/null +++ b/poxml/antlr/src/BaseAST.cpp @@ -0,0 +1,320 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/BaseAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +//bool BaseAST::verboseStringConversion; +//ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> BaseAST::tokenNames; + +void BaseAST::addChild(RefAST c) +{ + if (!c) + return; + RefBaseAST tmp=down; + if (tmp) { + while (tmp->right) + tmp=tmp->right; + tmp->right=c; + } else { + down=c; + } +} + +void BaseAST::doWorkForFindAll( + ANTLR_USE_NAMESPACE(std)vector<RefAST>& v, + RefAST target,bool partialMatch) +{ + // Start walking sibling lists, looking for matches. + for (RefAST sibling=this; + sibling; + sibling=sibling->getNextSibling()) + { + if ( (partialMatch && sibling->equalsTreePartial(target)) || + (!partialMatch && sibling->equalsTree(target)) ) { + v.push_back(sibling); + } + // regardless of match or not, check any children for matches + if ( sibling->getFirstChild() ) { + RefBaseAST(sibling->getFirstChild())->doWorkForFindAll(v, target, partialMatch); + } + } + +} + +/** Is node t equal to this in terms of token type and text? */ +bool BaseAST::equals(RefAST t) const +{ + if (!t) + return false; + return (getText() == t->getText()) && (getType() == t->getType()); +} + +/** Is t an exact structural and equals() match of this tree. The + * 'this' reference is considered the start of a sibling list. + */ +bool BaseAST::equalsList(RefAST t) const +{ + // the empty tree is not a match of any non-null tree. + if (!t) + return false; + + // Otherwise, start walking sibling lists. First mismatch, return false. + RefAST sibling=this; + for (;sibling && t; + sibling=sibling->getNextSibling(), t=t->getNextSibling()) { + // as a quick optimization, check roots first. + if (!sibling->equals(t)) + return false; + // if roots match, do full list match test on children. + if (sibling->getFirstChild()) { + if (!sibling->getFirstChild()->equalsList(t->getFirstChild())) + return false; + } + // sibling has no kids, make sure t doesn't either + else if (t->getFirstChild()) + return false; + } + + if (!sibling && !t) + return true; + + // one sibling list has more than the other + return false; +} + +/** Is 'sub' a subtree of this list? + * The siblings of the root are NOT ignored. + */ +bool BaseAST::equalsListPartial(RefAST sub) const +{ + // the empty tree is always a subset of any tree. + if (!sub) + return true; + + // Otherwise, start walking sibling lists. First mismatch, return false. + RefAST sibling=this; + for (;sibling && sub; + sibling=sibling->getNextSibling(), sub=sub->getNextSibling()) { + // as a quick optimization, check roots first. + if (!sibling->equals(sub)) + return false; + // if roots match, do partial list match test on children. + if (sibling->getFirstChild()) + if (!sibling->getFirstChild()->equalsListPartial(sub->getFirstChild())) + return false; + } + + if (!sibling && sub) + // nothing left to match in this tree, but subtree has more + return false; + + // either both are null or sibling has more, but subtree doesn't + return true; +} + +/** Is tree rooted at 'this' equal to 't'? The siblings + * of 'this' are ignored. + */ +bool BaseAST::equalsTree(RefAST t) const +{ + // check roots first + if (!equals(t)) + return false; + // if roots match, do full list match test on children. + if (getFirstChild()) { + if (!getFirstChild()->equalsList(t->getFirstChild())) + return false; + } + // sibling has no kids, make sure t doesn't either + else if (t->getFirstChild()) + return false; + + return true; +} + +/** Is 'sub' a subtree of the tree rooted at 'this'? The siblings + * of 'this' are ignored. + */ +bool BaseAST::equalsTreePartial(RefAST sub) const +{ + // the empty tree is always a subset of any tree. + if (!sub) + return true; + + // check roots first + if (!equals(sub)) + return false; + // if roots match, do full list partial match test on children. + if (getFirstChild()) + if (!getFirstChild()->equalsListPartial(sub->getFirstChild())) + return false; + + return true; +} + +/** Walk the tree looking for all exact subtree matches. Return + * an ASTEnumerator that lets the caller walk the list + * of subtree roots found herein. + */ +ANTLR_USE_NAMESPACE(std)vector<RefAST> BaseAST::findAll(RefAST target) +{ + ANTLR_USE_NAMESPACE(std)vector<RefAST> roots; + + // the empty tree cannot result in an enumeration + if (target) { + doWorkForFindAll(roots,target,false); // find all matches recursively + } + + return roots; +} + +/** Walk the tree looking for all subtrees. Return + * an ASTEnumerator that lets the caller walk the list + * of subtree roots found herein. + */ +ANTLR_USE_NAMESPACE(std)vector<RefAST> BaseAST::findAllPartial(RefAST target) +{ + ANTLR_USE_NAMESPACE(std)vector<RefAST> roots; + + // the empty tree cannot result in an enumeration + if (target) { + doWorkForFindAll(roots,target,true); // find all matches recursively + } + + return roots; +} + +RefAST BaseAST::getFirstChild() const +{ + return RefAST(down); +} + +RefAST BaseAST::getNextSibling() const +{ + return RefAST(right); +} + +ANTLR_USE_NAMESPACE(std)string BaseAST::getText() const +{ + return ""; +} + +int BaseAST::getType() const +{ + return 0; +} + +void BaseAST::removeChildren() +{ + down=nullAST; +} + +void BaseAST::setFirstChild(RefAST c) +{ + down=c; +} + +void BaseAST::setNextSibling(RefAST n) +{ + right=n; +} + +void BaseAST::setText(const ANTLR_USE_NAMESPACE(std)string& txt) +{ +} + +void BaseAST::setType(int type) +{ +} + +//void BaseAST::setVerboseStringConversion(bool verbose, +// const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& names) +//{ +// verboseStringConversion = verbose; +// tokenNames = names; +//} + +ANTLR_USE_NAMESPACE(std)string BaseAST::toString() const +{ +// if ( verboseStringConversion && +// !getText().equalsIgnoreCase(tokenNames[getType()]) && +// !getText().equalsIgnoreCase(Tool.stripFrontBack(tokenNames[getType()],"\"","\"")) ) { +// b.append('['); +// b.append(getText()); +// b.append(",<"); +// b.append(tokenNames[getType()]); +// b.append(">]"); +// return b.toString(); +// } + return getText(); +} + +ANTLR_USE_NAMESPACE(std)string BaseAST::toStringList() const +{ + ANTLR_USE_NAMESPACE(std)string ts=""; + if (getFirstChild()) { + ts+=" ( "; + ts+=toString(); + ts+=getFirstChild()->toStringList(); + ts+=" )"; + } else { + ts+=" "; + ts+=toString(); + } + if (getNextSibling()) + ts+=getNextSibling()->toStringList(); + return ts; +} + +ANTLR_USE_NAMESPACE(std)string BaseAST::toStringTree() const +{ + ANTLR_USE_NAMESPACE(std)string ts=""; + if (getFirstChild()) { + ts+=" ( "; + ts+=toString(); + ts+=getFirstChild()->toStringList(); + ts+=" )"; + } else { + ts+=" "; + ts+=toString(); + } + return ts; +} + +// this is nasty, but it makes the code generation easier +RefAST nullAST; +AST* const nullASTptr=0; + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/BitSet.cpp b/poxml/antlr/src/BitSet.cpp new file mode 100644 index 00000000..a0a1b110 --- /dev/null +++ b/poxml/antlr/src/BitSet.cpp @@ -0,0 +1,76 @@ +#include "antlr/BitSet.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** A BitSet to replace java.util.BitSet. + * Primary differences are that most set operators return new sets + * as opposed to oring and anding "in place". Further, a number of + * operations were added. I cannot contain a BitSet because there + * is no way to access the internal bits (which I need for speed) + * and, because it is final, I cannot subclass to add functionality. + * Consider defining set degree. Without access to the bits, I must + * call a method n times to test the ith bit...ack! + * + * Also seems like or() from util is wrong when size of incoming set is bigger + * than this.length. + * + * This is a C++ version of the Java class described above, with only + * a handful of the methods implemented, because we don't need the + * others at runtime. It's really just a wrapper around vector<bool>, + * which should probably be changed to a wrapper around bitset, once + * bitset is more widely available. + * + * @author Terence Parr, MageLang Institute + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ +BitSet::BitSet(int nbits) + : storage(nbits) +{ + for (int i=0;i<nbits;i++) { + storage[i] = false; + } +} + +BitSet::BitSet(const unsigned long* bits_,int nlongs) + : storage(nlongs*32) +{ + for ( int i = 0 ; i < nlongs*32; i++) { + storage[i] = (bits_[i>>5] & (1UL << (i&31))) ? true : false; + } +} + +BitSet::~BitSet() +{ +} + +void BitSet::add(int el) +{ + if ( el < 0 ) + throw ANTLR_USE_NAMESPACE(std)out_of_range(ANTLR_USE_NAMESPACE(std)string("antlr::BitSet.cpp line 49")); + + if( static_cast<unsigned int>(el) >= storage.size() ) + storage.resize( el+1, false ); + + storage[el] = true; +} + +bool BitSet::member(int el) const +{ + if ( el < 0 || static_cast<unsigned int>(el) >= storage.size()) + return false; + + return storage[el]; +} + +ANTLR_USE_NAMESPACE(std)vector<int> BitSet::toArray() const +{ + ANTLR_USE_NAMESPACE(std)vector<int> elems; + for (unsigned int i=0;i<storage.size();i++) { + if (storage[i]) + elems.push_back(i); + } + + return elems; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/CharBuffer.cpp b/poxml/antlr/src/CharBuffer.cpp new file mode 100644 index 00000000..a43eb153 --- /dev/null +++ b/poxml/antlr/src/CharBuffer.cpp @@ -0,0 +1,67 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/**A Stream of characters fed to the lexer from a InputStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input characters. Normally, + * "k" characters are stored in the buffer. More characters may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of characters is deferred. In other words, reading the next + * character is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.CharQueue + */ + +#include "antlr/CharBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** Create a character buffer */ +CharBuffer::CharBuffer(ANTLR_USE_NAMESPACE(std)istream& input_) +: input(input_) +{} + +/** Get the next character from the stream */ +int CharBuffer::getChar() +{ +// try { + return input.get(); +// } +// catch (???& e) { +// throw CharStreamIOException(e); +// } +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CharScanner.cpp b/poxml/antlr/src/CharScanner.cpp new file mode 100644 index 00000000..ff40138d --- /dev/null +++ b/poxml/antlr/src/CharScanner.cpp @@ -0,0 +1,430 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/CharScanner.hpp" +#include "antlr/CommonToken.hpp" +#include "antlr/MismatchedCharException.hpp" +#include <map> + +#ifdef HAS_NOT_CCTYPE_H +#include <ctype.h> +#else +#include <cctype> +#endif + +#include <iostream> + +#ifdef HAS_NOT_CSTRING_H +#include <string> +#else +#include <cstring> +#endif +#include <stdlib.h> + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(exit) +ANTLR_C_USING(tolower) + +#ifdef ANTLR_REALLY_NO_STRCASECMP +// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior +// on the mac has neither... +inline int strcasecmp(const char *s1, const char *s2) +{ + while (true) + { + char c1 = tolower(*s1++), + c2 = tolower(*s2++); + if (c1 < c2) return -1; + if (c1 > c2) return 1; + if (c1 == 0) return 0; + } +} +#else +#ifdef NO_STRCASECMP +ANTLR_C_USING(stricmp) +#else +ANTLR_C_USING(strcasecmp) +#endif +#endif + +CharScannerLiteralsLess::CharScannerLiteralsLess(const CharScanner* theScanner) +: scanner(theScanner) +{} + +bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const +{ + if (scanner->getCaseSensitiveLiterals()) { + return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y); + } else { +#ifdef NO_STRCASECMP + return (stricmp(x.c_str(),y.c_str())<0); +#else + return (strcasecmp(x.c_str(),y.c_str())<0); +#endif + } +} + +CharScanner::CharScanner(InputBuffer& cb) + : saveConsumedInput(true) //, caseSensitiveLiterals(true) + , literals(CharScannerLiteralsLess(this)) + , inputState(new LexerInputState(cb)) + , commitToPath(false) + , traceDepth(0) +{ + setTokenObjectFactory(&CommonToken::factory); +} + +CharScanner::CharScanner(InputBuffer* cb) + : saveConsumedInput(true) //, caseSensitiveLiterals(true) + , literals(CharScannerLiteralsLess(this)) + , inputState(new LexerInputState(cb)) + , commitToPath(false) + , traceDepth(0) +{ + setTokenObjectFactory(&CommonToken::factory); +} + +CharScanner::CharScanner(const LexerSharedInputState& state) + : saveConsumedInput(true) //, caseSensitiveLiterals(true) + , literals(CharScannerLiteralsLess(this)) + , inputState(state) + , commitToPath(false) + , traceDepth(0) +{ + setTokenObjectFactory(&CommonToken::factory); +} + +CharScanner::~CharScanner() +{ +} + +void CharScanner::append(char c) +{ + if (saveConsumedInput) { + int l = text.length(); + if ((l%256) == 0) text.reserve(l+256); + text.replace(l,0,&c,1); + } +} + +void CharScanner::append(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if (saveConsumedInput) + text+=s; +} + +void CharScanner::commit() +{ + inputState->getInput().commit(); +} + +void CharScanner::consume() +{ + if (inputState->guessing == 0) { + int c = LA(1); + if (caseSensitive) { + append(c); + } else { + // use input.LA(), not LA(), to get original case + // CharScanner.LA() would toLower it. + append(inputState->getInput().LA(1)); + } + if (c == '\t') { + tab(); + } + else { + inputState->column++; + } + } + inputState->getInput().consume(); +} + +/** Consume chars until one matches the given char */ +void CharScanner::consumeUntil(int c) +{ + while (LA(1) != EOF_CHAR && LA(1) != c) + { + consume(); + } +} + +/** Consume chars until one matches the given set */ +void CharScanner::consumeUntil(const BitSet& set) +{ + while (LA(1) != EOF_CHAR && !set.member(LA(1))) { + consume(); + } +} + +bool CharScanner::getCaseSensitive() const +{ return caseSensitive; } + +//bool CharScanner::getCaseSensitiveLiterals() const +//{ return caseSensitiveLiterals; } + +int CharScanner::getColumn() const +{ return inputState->column; } + +void CharScanner::setColumn(int c) +{ inputState->column = c; } + +bool CharScanner::getCommitToPath() const +{ return commitToPath; } + +const ANTLR_USE_NAMESPACE(std)string& CharScanner::getFilename() const +{ return inputState->filename; } + +InputBuffer& CharScanner::getInputBuffer() +{ return inputState->getInput(); } + +LexerSharedInputState CharScanner::getInputState() +{ return inputState; } + +int CharScanner::getLine() const +{ return inputState->line; } + +/** return a copy of the current text buffer */ +const ANTLR_USE_NAMESPACE(std)string& CharScanner::getText() const +{ return text; } + +RefToken CharScanner::getTokenObject() const +{ return _returnToken; } + +RefToken CharScanner::makeToken(int t) +{ + RefToken tok=tokenFactory(); + tok->setType(t); + tok->setColumn(inputState->tokenStartColumn); + tok->setLine(inputState->tokenStartLine); + return tok; +} + +int CharScanner::mark() +{ + return inputState->getInput().mark(); +} + +void CharScanner::match(int c) +{ + if ( LA(1) != c ) { + throw MismatchedCharException(LA(1),c,false,this); + } + consume(); +} + +void CharScanner::match(const BitSet& b) +{ + if (!b.member(LA(1))) { + throw MismatchedCharException(LA(1),b,false,this); + } + consume(); +} + +void CharScanner::match(const ANTLR_USE_NAMESPACE(std)string& s) +{ + int len = s.length(); + for (int i=0; i<len; i++) { + if ( LA(1) != s[i] ) { + throw MismatchedCharException(LA(1),s[i],false,this); + } + consume(); + } +} + +void CharScanner::matchNot(int c) +{ + if ( LA(1) == c ) { + throw MismatchedCharException(LA(1),c,true,this); + } + consume(); +} + +void CharScanner::matchRange(int c1, int c2) +{ + if (LA(1)<c1 || LA(1)>c2) { + throw MismatchedCharException(LA(1),c1,c2,false,this); + } + consume(); +} + +void CharScanner::newline() +{ + ++inputState->line; + inputState->column=1; +} + +/** advance the current column number by an appropriate amount. + * If you do not override this to specify how much to jump for + * a tab, then tabs are counted as one char. This method is + * called from consume(). + */ +void CharScanner::tab() { + // update inputState->column as function of + // inputState->column and tab stops. + // For example, if tab stops are columns 1 and 5 etc... + // and column is 3, then add 2 to column. + ++inputState->column; +} + +void CharScanner::panic() +{ + ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic" << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +void CharScanner::panic(const ANTLR_USE_NAMESPACE(std)string& s) +{ + ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +/** Report exception errors caught in nextToken() */ +void CharScanner::reportError(const RecognitionException& ex) +{ + ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser error-reporting function can be overridden in subclass */ +void CharScanner::reportError(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if (getFilename().empty()) + ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser warning-reporting function can be overridden in subclass */ +void CharScanner::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if (getFilename().empty()) + ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +void CharScanner::resetText() +{ + text=""; + inputState->tokenStartColumn = inputState->column; + inputState->tokenStartLine = inputState->line; +} + +void CharScanner::rewind(int pos) +{ + inputState->getInput().rewind(pos); +} + +void CharScanner::setCaseSensitive(bool t) +{ + caseSensitive = t; +} + +void CharScanner::setCommitToPath(bool commit) +{ + commitToPath = commit; +} + +void CharScanner::setFilename(const ANTLR_USE_NAMESPACE(std)string& f) +{ inputState->filename=f; } + +void CharScanner::setInputState(LexerSharedInputState state) +{ inputState = state; } + +void CharScanner::setLine(int l) +{ inputState->line=l; } + +void CharScanner::setText(const ANTLR_USE_NAMESPACE(std)string& s) +{ text=s; } + +void CharScanner::setTokenObjectFactory(factory_type factory) +{ tokenFactory=factory; } + +/** Test the token text against the literals table + * Override this method to perform a different literals test */ +int CharScanner::testLiteralsTable(int ttype) const +{ + ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text); + if (i != literals.end()) + ttype = (*i).second; + return ttype; +} + +/** Test the text passed in against the literals table + * Override this method to perform a different literals test + * This is used primarily when you want to test a portion of + * a token. + */ +int CharScanner::testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text_, int ttype) const +{ + ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text_); + if (i != literals.end()) + ttype = (*i).second; + return ttype; +} + +/** Override this method to get more specific case handling */ +int CharScanner::toLower(int c) const +{ + return tolower(c); +} + +void CharScanner::traceIndent() +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; +} + +void CharScanner::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceDepth++; + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "> lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; +} + +void CharScanner::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "< lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; + traceDepth--; +} + +void CharScanner::uponEOF() +{ +} + +#ifndef NO_STATIC_CONSTS +const int CharScanner::NO_CHAR; +const int CharScanner::EOF_CHAR; +#endif + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CommonAST.cpp b/poxml/antlr/src/CommonAST.cpp new file mode 100644 index 00000000..3a4067e3 --- /dev/null +++ b/poxml/antlr/src/CommonAST.cpp @@ -0,0 +1,100 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/CommonAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +CommonAST::CommonAST() +: BaseAST(), + ttype( Token::INVALID_TYPE ), + text("") +{ +} + +CommonAST::CommonAST(RefToken t) +: BaseAST(), + ttype( t->getType() ), + text( t->getText() ) +{ +} + +CommonAST::~CommonAST() +{ +} + +ANTLR_USE_NAMESPACE(std)string CommonAST::getText() const +{ + return text; +} + +int CommonAST::getType() const +{ + return ttype; +} + +void CommonAST::initialize(int t,const ANTLR_USE_NAMESPACE(std)string& txt) +{ + setType(t); + setText(txt); +} + +void CommonAST::initialize(RefAST t) +{ + setType(t->getType()); + setText(t->getText()); +} + +void CommonAST::initialize(RefToken t) +{ + setType(t->getType()); + setText(t->getText()); +} + +void CommonAST::setText(const ANTLR_USE_NAMESPACE(std)string& txt) +{ + text = txt; +} + +void CommonAST::setType(int type) +{ + ttype = type; +} + +RefAST CommonAST::factory() +{ + return RefAST(new CommonAST); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CommonASTWithHiddenTokens.cpp b/poxml/antlr/src/CommonASTWithHiddenTokens.cpp new file mode 100644 index 00000000..d6c242d2 --- /dev/null +++ b/poxml/antlr/src/CommonASTWithHiddenTokens.cpp @@ -0,0 +1,29 @@ +#include "antlr/config.hpp" +#include "antlr/CommonASTWithHiddenTokens.hpp" +#include "antlr/CommonHiddenStreamToken.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +void CommonASTWithHiddenTokens::initialize(int t,const ANTLR_USE_NAMESPACE(std)string& txt) +{ + CommonAST::initialize(t,txt); +} + +void CommonASTWithHiddenTokens::initialize(RefAST t) +{ + CommonAST::initialize(t); +} + +void CommonASTWithHiddenTokens::initialize(RefToken t) +{ + CommonAST::initialize(t); + hiddenBefore = static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenBefore(); + hiddenAfter = static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenAfter(); +} + +RefAST CommonASTWithHiddenTokens::factory() +{ + return RefAST(new CommonASTWithHiddenTokens); +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/CommonHiddenStreamToken.cpp b/poxml/antlr/src/CommonHiddenStreamToken.cpp new file mode 100644 index 00000000..d33927cc --- /dev/null +++ b/poxml/antlr/src/CommonHiddenStreamToken.cpp @@ -0,0 +1,46 @@ +#include "antlr/CommonHiddenStreamToken.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +CommonHiddenStreamToken::CommonHiddenStreamToken() +: CommonToken() +{ +} + +CommonHiddenStreamToken::CommonHiddenStreamToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt) +: CommonToken(t,txt) +{ +} + +CommonHiddenStreamToken::CommonHiddenStreamToken(const ANTLR_USE_NAMESPACE(std)string& s) +: CommonToken(s) +{ +} + +RefToken CommonHiddenStreamToken::getHiddenAfter() +{ + return hiddenAfter; +} + +RefToken CommonHiddenStreamToken::getHiddenBefore() +{ + return hiddenBefore; +} + +RefToken CommonHiddenStreamToken::factory() +{ + return RefToken(new CommonHiddenStreamToken); +} + +void CommonHiddenStreamToken::setHiddenAfter(RefToken t) +{ + hiddenAfter = t; +} + +void CommonHiddenStreamToken::setHiddenBefore(RefToken t) +{ + hiddenBefore = t; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CommonToken.cpp b/poxml/antlr/src/CommonToken.cpp new file mode 100644 index 00000000..ff60bd79 --- /dev/null +++ b/poxml/antlr/src/CommonToken.cpp @@ -0,0 +1,81 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/CommonToken.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +CommonToken::CommonToken() : Token(), line(1), col(1), text("") +{} + +CommonToken::CommonToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt) + : Token(t), line(1), col(1), text(txt) +{} + +CommonToken::CommonToken(const ANTLR_USE_NAMESPACE(std)string& s) + : Token(), line(1), col(1), text(s) +{} + +int CommonToken::getLine() const +{ return line; } + +ANTLR_USE_NAMESPACE(std)string CommonToken::getText() const +{ return text; } + +void CommonToken::setLine(int l) +{ line=l; } + +void CommonToken::setText(const ANTLR_USE_NAMESPACE(std)string& s) +{ text=s; } + +ANTLR_USE_NAMESPACE(std)string CommonToken::toString() const +{ + return "[\""+getText()+"\",<"+type+">,line="+line+"]"; +} + +int CommonToken::getColumn() const +{ return col; } + +void CommonToken::setColumn(int c) +{ col=c; } + +bool CommonToken::isInvalid() const +{ return type==INVALID_TYPE; } + +RefToken CommonToken::factory() +{ + return RefToken(new CommonToken); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/InputBuffer.cpp b/poxml/antlr/src/InputBuffer.cpp new file mode 100644 index 00000000..058c32ab --- /dev/null +++ b/poxml/antlr/src/InputBuffer.cpp @@ -0,0 +1,109 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/**A Stream of characters fed to the lexer from a InputStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input characters. Normally, + * "k" characters are stored in the buffer. More characters may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of characters is deferred. In other words, reading the next + * character is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.CharQueue + */ + +#include "antlr/InputBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** Create a character buffer */ +InputBuffer::InputBuffer() +: nMarkers(0), markerOffset(0), numToConsume(0) +{} + +/** This method updates the state of the input buffer so that + * the text matched since the most recent mark() is no longer + * held by the buffer. So, you either do a mark/rewind for + * failed predicate or mark/commit to keep on parsing without + * rewinding the input. + */ +void InputBuffer::commit() +{ + nMarkers--; +} + +/** Mark another character for deferred consumption */ +void InputBuffer::consume() +{ + numToConsume++; +} + +/** Ensure that the character buffer is sufficiently full */ +void InputBuffer::fill(int amount) +{ + syncConsume(); + // Fill the buffer sufficiently to hold needed characters + while (queue.entries() < amount + markerOffset) { + // Append the next character + queue.append(getChar()); + } +} + +bool InputBuffer::isMarked() const +{ + return (nMarkers != 0); +} + +/**Return an integer marker that can be used to rewind the buffer to + * its current state. + */ +int InputBuffer::mark() +{ + syncConsume(); + nMarkers++; + return markerOffset; +} + +/**Rewind the character buffer to a marker. + * @param mark Marker returned previously from mark() + */ +void InputBuffer::rewind(int mark) +{ + syncConsume(); + markerOffset = mark; + nMarkers--; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/LLkParser.cpp b/poxml/antlr/src/LLkParser.cpp new file mode 100644 index 00000000..2f21cd8b --- /dev/null +++ b/poxml/antlr/src/LLkParser.cpp @@ -0,0 +1,105 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/LLkParser.hpp" +#include <iostream> + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**An LL(k) parser. + * + * @see antlr.Token + * @see antlr.TokenBuffer + * @see antlr.LL1Parser + */ + +// LLkParser(int k_); + +LLkParser::LLkParser(const ParserSharedInputState& state, int k_) +: Parser(state), k(k_) +{} + +LLkParser::LLkParser(TokenBuffer& tokenBuf, int k_) +: Parser(tokenBuf), k(k_) +{} + +LLkParser::LLkParser(TokenStream& lexer, int k_) +: Parser(new TokenBuffer(lexer)), k(k_) +{ +} + +/**Consume another token from the input stream. Can only write sequentially! + * If you need 3 tokens ahead, you must consume() 3 times. + * <p> + * Note that it is possible to overwrite tokens that have not been matched. + * For example, calling consume() 3 times when k=2, means that the first token + * consumed will be overwritten with the 3rd. + */ +void LLkParser::consume() +{ inputState->getInput().consume(); } + +int LLkParser::LA(int i) +{ return inputState->getInput().LA(i); } + +RefToken LLkParser::LT(int i) +{ return inputState->getInput().LT(i); } + +void LLkParser::trace(const ANTLR_USE_NAMESPACE(std)string& ee, const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceIndent(); + + ANTLR_USE_NAMESPACE(std)cout << ee.c_str() << rname.c_str() << ((inputState->guessing>0)?"; [guessing]":"; "); + + for (int i = 1; i <= k; i++) + { + if (i != 1) { + ANTLR_USE_NAMESPACE(std)cout << ", "; + } + ANTLR_USE_NAMESPACE(std)cout << "LA(" << i << ")==" << LT(i)->getText().c_str(); + } + + ANTLR_USE_NAMESPACE(std)cout << ANTLR_USE_NAMESPACE(std)endl; +} + +void LLkParser::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceDepth++; + trace("> ",rname); +} + +void LLkParser::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + trace("< ",rname); + traceDepth--; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/LexerSharedInputState.cpp b/poxml/antlr/src/LexerSharedInputState.cpp new file mode 100644 index 00000000..a95f33a8 --- /dev/null +++ b/poxml/antlr/src/LexerSharedInputState.cpp @@ -0,0 +1,55 @@ +#include "antlr/LexerSharedInputState.hpp" +#include "antlr/CharBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input stream of characters. Multiple lexers + * share a single LexerSharedInputState to lex + * the same input stream. + */ + +LexerInputState::LexerInputState(InputBuffer* inbuf) +: column(1) +, line(1) +, tokenStartColumn(1) +, tokenStartLine(1) +, guessing(0) +, filename("") +, input(inbuf) +, inputResponsible(true) +{ +} + +LexerInputState::LexerInputState(InputBuffer& inbuf) +: column(1) +, line(1) +, tokenStartColumn(1) +, tokenStartLine(1) +, guessing(0) +, filename("") +, input(&inbuf) +, inputResponsible(false) +{ +} + +LexerInputState::LexerInputState(ANTLR_USE_NAMESPACE(std)istream& in) +: column(1) +, line(1) +, tokenStartColumn(1) +, tokenStartLine(1) +, guessing(0) +, filename("") +, input(new CharBuffer(in)) +, inputResponsible(true) +{ +} + +LexerInputState::~LexerInputState() +{ + if (inputResponsible) + delete input; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/Makefile.am b/poxml/antlr/src/Makefile.am new file mode 100644 index 00000000..7a5d2426 --- /dev/null +++ b/poxml/antlr/src/Makefile.am @@ -0,0 +1,39 @@ + +# Make #include <antlr/xxx> work.. +INCLUDES=-I$(srcdir)/.. +KDE_CXXFLAGS = $(USE_EXCEPTIONS) + +noinst_LTLIBRARIES = libantlr.la + +libantlr_la_LDFLAGS = -no-undefined + +libantlr_la_SOURCES = \ + ANTLRException.cpp \ + ASTFactory.cpp \ + ASTRefCount.cpp \ + BaseAST.cpp \ + BitSet.cpp \ + CharBuffer.cpp \ + CharScanner.cpp \ + CommonAST.cpp \ + CommonASTWithHiddenTokens.cpp \ + CommonHiddenStreamToken.cpp \ + CommonToken.cpp \ + InputBuffer.cpp \ + LLkParser.cpp \ + LexerSharedInputState.cpp \ + MismatchedCharException.cpp \ + MismatchedTokenException.cpp \ + NoViableAltException.cpp \ + NoViableAltForCharException.cpp \ + Parser.cpp \ + ParserSharedInputState.cpp \ + RecognitionException.cpp \ + String.cpp \ + Token.cpp \ + TokenBuffer.cpp \ + TokenStreamBasicFilter.cpp \ + TokenStreamHiddenTokenFilter.cpp \ + TokenStreamSelector.cpp \ + TreeParser.cpp \ + TreeParserSharedInputState.cpp diff --git a/poxml/antlr/src/MismatchedCharException.cpp b/poxml/antlr/src/MismatchedCharException.cpp new file mode 100644 index 00000000..4dede0e8 --- /dev/null +++ b/poxml/antlr/src/MismatchedCharException.cpp @@ -0,0 +1,153 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/MismatchedCharException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +MismatchedCharException::MismatchedCharException() +: RecognitionException("Mismatched char") +{} + +// Expected range / not range +MismatchedCharException::MismatchedCharException( + int c, + int lower, + int upper_, + bool matchNot, + CharScanner* scanner_ +) : RecognitionException("Mismatched char", + scanner_->getFilename(), + scanner_->getLine(), + scanner_->getColumn()) + , mismatchType(matchNot ? NOT_RANGE : RANGE) + , foundChar(c) + , expecting(lower) + , upper(upper_) + , scanner(scanner_) +{ +} + +// Expected token / not token +MismatchedCharException::MismatchedCharException( + int c, + int expecting_, + bool matchNot, + CharScanner* scanner_ +) : RecognitionException("Mismatched char", + scanner_->getFilename(), + scanner_->getLine(), + scanner_->getColumn()) + , mismatchType(matchNot ? NOT_CHAR : CHAR) + , foundChar(c) + , expecting(expecting_) + , scanner(scanner_) +{ +} + +// Expected BitSet / not BitSet +MismatchedCharException::MismatchedCharException( + int c, + BitSet set_, + bool matchNot, + CharScanner* scanner_ +) : RecognitionException("Mismatched char", + scanner_->getFilename(), + scanner_->getLine(), + scanner_->getColumn()) + , mismatchType(matchNot ? NOT_SET : SET) + , foundChar(c) + , set(set_) + , scanner(scanner_) +{ +} + +MismatchedCharException::MismatchedCharException( + const ANTLR_USE_NAMESPACE(std)string& s, + int line +) : RecognitionException(s) +{ +} + +/** + * Returns the error message that happened on the line/col given. + * Copied from toString(). + */ +ANTLR_USE_NAMESPACE(std)string MismatchedCharException::getMessage() const +{ + ANTLR_USE_NAMESPACE(std)string s; + + switch (mismatchType) { + case CHAR : + s += "expecting '" + charName(expecting) + "', found '" + charName(foundChar) + "'"; + break; + case NOT_CHAR : + s += "expecting anything but '" + charName(expecting) + "'; got it anyway"; + break; + case RANGE : + s += "expecting token in range: '" + charName(expecting) + "'..'" + charName(upper) + "', found '" + charName(foundChar) + "'"; + break; + case NOT_RANGE : + s += "expecting token NOT in range: " + charName(expecting) + "'..'" + charName(upper) + "', found '" + charName(foundChar) + "'"; + break; + case SET : + case NOT_SET : + { + s += ANTLR_USE_NAMESPACE(std)string("expecting ") + (mismatchType == NOT_SET ? "NOT " : "") + "one of ("; + ANTLR_USE_NAMESPACE(std)vector<int> elems = set.toArray(); + for (int i = 0; i < (int) elems.size(); i++) { + s += " '"; + s += charName(elems[i]); + s += "'"; + } + s += "), found '" + charName(foundChar) + "'"; + } + break; + default : + s += RecognitionException::getMessage(); + break; + } + + return s; +} + +#ifndef NO_STATIC_CONSTS +const int MismatchedCharException::CHAR; +const int MismatchedCharException::NOT_CHAR; +const int MismatchedCharException::RANGE; +const int MismatchedCharException::NOT_RANGE; +const int MismatchedCharException::SET; +const int MismatchedCharException::NOT_SET; +#endif + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/MismatchedTokenException.cpp b/poxml/antlr/src/MismatchedTokenException.cpp new file mode 100644 index 00000000..b8b10808 --- /dev/null +++ b/poxml/antlr/src/MismatchedTokenException.cpp @@ -0,0 +1,223 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/MismatchedTokenException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +MismatchedTokenException::MismatchedTokenException() +: RecognitionException("Mismatched Token: expecting any AST node","<AST>",1) +, token(0) +, node(nullASTptr) +{ +} + +// Expected range / not range +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + int lower, + int upper_, + bool matchNot +) : RecognitionException("Mismatched Token") + , tokenNames(tokenNames_) + , token(0) + , node(node_) + , tokenText( (node_ ? node_->toString(): ANTLR_USE_NAMESPACE(std)string("<empty tree>")) ) + , mismatchType(matchNot ? NOT_RANGE : RANGE) + , expecting(lower) + , upper(upper_) +{ + fileName = "<AST>"; +} + +// Expected token / not token +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + int expecting_, + bool matchNot +) : RecognitionException("Mismatched Token") + , tokenNames(tokenNames_) + , token(0) + , node(node_) + , tokenText( (node_ ? node_->toString(): ANTLR_USE_NAMESPACE(std)string("<empty tree>")) ) + , mismatchType(matchNot ? NOT_TOKEN : TOKEN) + , expecting(expecting_) +{ + fileName = "<AST>"; +} + +// Expected BitSet / not BitSet +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + BitSet set_, + bool matchNot +) : RecognitionException("Mismatched Token") + , tokenNames(tokenNames_) + , token(0) + , node(node_) + , tokenText( (node_ ? node_->toString(): ANTLR_USE_NAMESPACE(std)string("<empty tree>")) ) + , mismatchType(matchNot ? NOT_SET : SET) + , set(set_) +{ + fileName = "<AST>"; +} + +// Expected range / not range +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + int lower, + int upper_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ +) : RecognitionException("Mismatched Token",fileName_,token_->getLine(),token_->getColumn()) + , tokenNames(tokenNames_) + , token(token_) + , node(nullASTptr) + , tokenText(token_->getText()) + , mismatchType(matchNot ? NOT_RANGE : RANGE) + , expecting(lower) + , upper(upper_) +{ +} + +// Expected token / not token +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + int expecting_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ +) : RecognitionException("Mismatched Token",fileName_,token_->getLine(),token_->getColumn()) + , tokenNames(tokenNames_) + , token(token_) + , node(nullASTptr) + , tokenText(token_->getText()) + , mismatchType(matchNot ? NOT_TOKEN : TOKEN) + , expecting(expecting_) +{ +} + +// Expected BitSet / not BitSet +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + BitSet set_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ +) : RecognitionException("Mismatched Token",fileName_,token_->getLine(),token_->getColumn()) + , tokenNames(tokenNames_) + , token(token_) + , node(nullASTptr) + , tokenText(token_->getText()) + , mismatchType(matchNot ? NOT_SET : SET) + , set(set_) +{ +} + +// deprecated As of ANTLR 2.7.0 +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::getErrorMessage() const +{ + return getMessage(); +} + +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::getMessage() const +{ + ANTLR_USE_NAMESPACE(std)string s; + switch (mismatchType) { + case TOKEN: + s += "expecting " + tokenName(expecting) + ", found '" + tokenText + "'"; + break; + case NOT_TOKEN: + s += "expecting anything but " + tokenName(expecting) + "; got it anyway"; + break; + case RANGE: + s += "expecting token in range: " + tokenName(expecting) + ".." + tokenName(upper) + ", found '" + tokenText + "'"; + break; + case NOT_RANGE: + s += "expecting token NOT in range: " + tokenName(expecting) + ".." + tokenName(upper) + ", found '" + tokenText + "'"; + break; + case SET: + case NOT_SET: + { + s += ANTLR_USE_NAMESPACE(std)string("expecting ") + (mismatchType == NOT_SET ? "NOT " : "") + "one of ("; + ANTLR_USE_NAMESPACE(std)vector<int> elems = set.toArray(); + for (int i = 0; i < (int) elems.size(); i++) + { + s += " "; + s += tokenName(elems[i]); + } + s += "), found '" + tokenText + "'"; + } + break; + default: + s = RecognitionException::getMessage(); + break; + } + return s; +} + +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::tokenName(int tokenType) const +{ + if (tokenType == Token::INVALID_TYPE) { + return "<Set of tokens>"; + } + else if (tokenType < 0 || tokenType >= (int) tokenNames.size()) { + return ANTLR_USE_NAMESPACE(std)string("<") + tokenType + ">"; + } + else { + return tokenNames[tokenType]; + } +} + +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::toString() const { + if (token) { + return getFileLineString() + getMessage(); + } + return getMessage(); +} + +#ifndef NO_STATIC_CONSTS +const int MismatchedTokenException::TOKEN; +const int MismatchedTokenException::NOT_TOKEN; +const int MismatchedTokenException::RANGE; +const int MismatchedTokenException::NOT_RANGE; +const int MismatchedTokenException::SET; +const int MismatchedTokenException::NOT_SET; +#endif + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/NoViableAltException.cpp b/poxml/antlr/src/NoViableAltException.cpp new file mode 100644 index 00000000..433f4325 --- /dev/null +++ b/poxml/antlr/src/NoViableAltException.cpp @@ -0,0 +1,82 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/NoViableAltException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +NoViableAltException::NoViableAltException(RefAST t) +: RecognitionException("NoViableAlt") +, token(0) +, node(t) +{ + fileName = "<AST>"; +} + +NoViableAltException::NoViableAltException(RefToken t,const ANTLR_USE_NAMESPACE(std)string& fileName_) +: RecognitionException("NoViableAlt") // line ")+t.getLine()+" token is "+t.getText()) +, token(t) +, node(nullASTptr) +{ + line = t->getLine(); + column = t->getColumn(); + fileName = fileName_; +} + +ANTLR_USE_NAMESPACE(std)string NoViableAltException::getErrorMessage() const +{ + return getMessage(); +} + +ANTLR_USE_NAMESPACE(std)string NoViableAltException::getMessage() const +{ + if (token) + return ANTLR_USE_NAMESPACE(std)string("unexpected token: ")+token->getText(); + + // must a tree parser error if token==null + if (!node) { + return "unexpected end of subtree"; + } + return ANTLR_USE_NAMESPACE(std)string("unexpected AST node: ")+node->toString(); +} + +ANTLR_USE_NAMESPACE(std)string NoViableAltException::toString() const +{ + if (token) + return getFileLineString()+getMessage(); + else + return getMessage(); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/NoViableAltForCharException.cpp b/poxml/antlr/src/NoViableAltForCharException.cpp new file mode 100644 index 00000000..2ff9120f --- /dev/null +++ b/poxml/antlr/src/NoViableAltForCharException.cpp @@ -0,0 +1,71 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Institute + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Institute + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/NoViableAltForCharException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +NoViableAltForCharException::NoViableAltForCharException(int c, CharScanner* scanner) +: RecognitionException("NoViableAlt") +, foundChar(c) +{ + line = scanner->getLine(); + fileName = scanner->getFilename(); +} + +NoViableAltForCharException::NoViableAltForCharException(int c, const ANTLR_USE_NAMESPACE(std)string& fileName_, int line_) +: RecognitionException("NoViableAlt") +, foundChar(c) +{ + line = line_; + fileName = fileName_; +} + +/** + * @deprecated As of ANTLR 2.7.0 + */ +ANTLR_USE_NAMESPACE(std)string NoViableAltForCharException::getErrorMessage() const +{ + return getMessage(); +} + +/** + * Returns a clean error message (no line number/column information) + */ +ANTLR_USE_NAMESPACE(std)string NoViableAltForCharException::getMessage() const +{ + return ANTLR_USE_NAMESPACE(std)string("unexpected char: ")+charName(foundChar); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/Parser.cpp b/poxml/antlr/src/Parser.cpp new file mode 100644 index 00000000..5a0388d4 --- /dev/null +++ b/poxml/antlr/src/Parser.cpp @@ -0,0 +1,304 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/Parser.hpp" + +#include "antlr/BitSet.hpp" +#include "antlr/TokenBuffer.hpp" +#include "antlr/MismatchedTokenException.hpp" +//#include "antlr/ASTFactory.hpp" +#include <iostream> +#include <stdlib.h> + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(exit) + +/**A generic ANTLR parser (LL(k) for k>=1) containing a bunch of + * utility routines useful at any lookahead depth. We distinguish between + * the LL(1) and LL(k) parsers because of efficiency. This may not be + * necessary in the near future. + * + * Each parser object contains the state of the parse including a lookahead + * cache (the form of which is determined by the subclass), whether or + * not the parser is in guess mode, where tokens come from, etc... + * + * <p> + * During <b>guess</b> mode, the current lookahead token(s) and token type(s) + * cache must be saved because the token stream may not have been informed + * to save the token (via <tt>mark</tt>) before the <tt>try</tt> block. + * Guessing is started by: + * <ol> + * <li>saving the lookahead cache. + * <li>marking the current position in the TokenBuffer. + * <li>increasing the guessing level. + * </ol> + * + * After guessing, the parser state is restored by: + * <ol> + * <li>restoring the lookahead cache. + * <li>rewinding the TokenBuffer. + * <li>decreasing the guessing level. + * </ol> + * + * @see antlr.Token + * @see antlr.TokenBuffer + * @see antlr.TokenStream + * @see antlr.LL1Parser + * @see antlr.LLkParser + */ + +bool DEBUG_PARSER=false; + +Parser::Parser(TokenBuffer& input) +: inputState(new ParserInputState(input)), traceDepth(0) +{ +} + +Parser::Parser(TokenBuffer* input) +: inputState(new ParserInputState(input)), traceDepth(0) +{ +} + +Parser::Parser(const ParserSharedInputState& state) +: inputState(state), traceDepth(0) +{ +} + +Parser::~Parser() +{ +} + +void Parser::setTokenNames(const char** tokenNames_) +{ + while (*tokenNames_) { + tokenNames.push_back(*(tokenNames_++)); + } +} + +/** Consume tokens until one matches the given token */ +void Parser::consumeUntil(int tokenType) +{ + while (LA(1) != Token::EOF_TYPE && LA(1) != tokenType) + consume(); +} + +/** Consume tokens until one matches the given token set */ +void Parser::consumeUntil(const BitSet& set) +{ + while (LA(1) != Token::EOF_TYPE && !set.member(LA(1))) + consume(); +} + +/** Get the AST return value squirreled away in the parser */ +RefAST Parser::getAST() +{ + return returnAST; +} + +ASTFactory& Parser::getASTFactory() +{ + return astFactory; +} + +ANTLR_USE_NAMESPACE(std)string Parser::getFilename() const +{ + return inputState->filename; +} + +ParserSharedInputState Parser::getInputState() const +{ + return inputState; +} + +ANTLR_USE_NAMESPACE(std)string Parser::getTokenName(int num) const +{ + return tokenNames[num]; +} + +ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> Parser::getTokenNames() const +{ + return tokenNames; +} + +// Forwarded to TokenBuffer +int Parser::mark() +{ + return inputState->getInput().mark(); +} + +/**Make sure current lookahead symbol matches token type <tt>t</tt>. + * Throw an exception upon mismatch, which is catch by either the + * error handler or by the syntactic predicate. + */ +void Parser::match(int t) +{ + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "enter match(" << t << ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; + } + if ( LA(1)!=t ) { + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << "!=" << t << ANTLR_USE_NAMESPACE(std)endl; + } + throw MismatchedTokenException(tokenNames, LT(1), t, false, getFilename()); + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } +} + +/**Make sure current lookahead symbol matches the given set + * Throw an exception upon mismatch, which is catch by either the + * error handler or by the syntactic predicate. + */ +void Parser::match(const BitSet& b) +{ + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "enter match(" << "bitset" /*b.toString()*/ + << ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; + } + if ( !b.member(LA(1)) ) { + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << " not member of " + << "bitset" /*b.toString()*/ << ANTLR_USE_NAMESPACE(std)endl; + } + throw MismatchedTokenException(tokenNames, LT(1), b, false, getFilename()); + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } +} + +void Parser::matchNot(int t) +{ + if ( LA(1)==t ) { + // Throws inverted-sense exception + throw MismatchedTokenException(tokenNames, LT(1), t, true, getFilename()); + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } +} + +void Parser::panic() +{ + ANTLR_USE_NAMESPACE(std)cerr << "Parser: panic" << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +/** Parser error-reporting function can be overridden in subclass */ +void Parser::reportError(const RecognitionException& ex) +{ + ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser error-reporting function can be overridden in subclass */ +void Parser::reportError(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if ( getFilename().empty() ) + ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser warning-reporting function can be overridden in subclass */ +void Parser::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if ( getFilename().empty() ) + ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +void Parser::rewind(int pos) +{ + inputState->getInput().rewind(pos); +} + +/** Set the object used to generate ASTs */ +// void setASTFactory(ASTFactory astFactory_); + +/** Specify the type of node to create during tree building */ +void Parser::setASTNodeFactory(ASTFactory::factory_type factory) +{ + astFactory.setASTNodeFactory(factory); +} + +void Parser::setFilename(const ANTLR_USE_NAMESPACE(std)string& f) +{ + inputState->filename = f; +} + +void Parser::setInputState(ParserSharedInputState state) +{ + inputState = state; +} + +/** Set or change the input token buffer */ +// void setTokenBuffer(TokenBuffer<Token>* t); + +void Parser::traceIndent() +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; +} + +void Parser::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceDepth++; + + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; + + ANTLR_USE_NAMESPACE(std)cout << "> " << rname.c_str() << "; LA(1)==" << LT(1)->getText().c_str() << + ((inputState->guessing>0)?" [guessing]":"") << ANTLR_USE_NAMESPACE(std)endl; +} + +void Parser::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; + + ANTLR_USE_NAMESPACE(std)cout << "< " << rname.c_str() << "; LA(1)==" << LT(1)->getText().c_str() << + ((inputState->guessing>0)?" [guessing]":"") << ANTLR_USE_NAMESPACE(std)endl; + + traceDepth--; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/ParserSharedInputState.cpp b/poxml/antlr/src/ParserSharedInputState.cpp new file mode 100644 index 00000000..102aba87 --- /dev/null +++ b/poxml/antlr/src/ParserSharedInputState.cpp @@ -0,0 +1,37 @@ +#include "antlr/ParserSharedInputState.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input stream of tokens. Multiple parsers + * share a single ParserSharedInputState to parse + * the same stream of tokens. + */ + +ParserInputState::ParserInputState(TokenBuffer* input_) +: guessing(0) +, input(input_) +, inputResponsible(true) +{ +} + +ParserInputState::ParserInputState(TokenBuffer& input_) +: guessing(0) +, input(&input_) +, inputResponsible(false) +{ +} + +ParserInputState::~ParserInputState() +{ + if (inputResponsible) + delete input; +} + +TokenBuffer& ParserInputState::getInput() +{ + return *input; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/RecognitionException.cpp b/poxml/antlr/src/RecognitionException.cpp new file mode 100644 index 00000000..1d1bd53d --- /dev/null +++ b/poxml/antlr/src/RecognitionException.cpp @@ -0,0 +1,87 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/RecognitionException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +RecognitionException::RecognitionException() +: ANTLRException("parsing error"), line(1), column(1) +{} + +RecognitionException::RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s) +: ANTLRException(s) +{} + +RecognitionException::RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s,const ANTLR_USE_NAMESPACE(std)string& fileName_,int line_) +: ANTLRException(s), fileName(fileName_), line(line_) +{} + +RecognitionException::RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s,const ANTLR_USE_NAMESPACE(std)string& fileName_,int line_,int column_) +: ANTLRException(s), fileName(fileName_), line(line_), column(column_) +{} + +int RecognitionException::getColumn() const +{ + return column; +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::getErrorMessage() const +{ + return getMessage(); +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::getFileLineString() const +{ + if ( fileName.length() ) + return fileName+": "+line+": "; + else + return ANTLR_USE_NAMESPACE(std)string("line ")+line+": "; +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::getFilename() const +{ + return fileName; +} + +int RecognitionException::getLine() const +{ + return line; +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::toString() const +{ + return getFileLineString()+getMessage(); +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/String.cpp b/poxml/antlr/src/String.cpp new file mode 100644 index 00000000..6d9df7a5 --- /dev/null +++ b/poxml/antlr/src/String.cpp @@ -0,0 +1,61 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/String.hpp" + +#ifdef HAS_NOT_CSTDIO_H +#include <stdio.h> +#else +#include <cstdio> +#endif + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(sprintf) + +ANTLR_USE_NAMESPACE(std)string operator+(const ANTLR_USE_NAMESPACE(std)string& lhs,int rhs) +{ + char tmp[100]; + sprintf(tmp,"%d",rhs); + return lhs+tmp; +} + +ANTLR_USE_NAMESPACE(std)string charName(int ch) +{ + if (ch == EOF) + return "EOF"; + else { + return ANTLR_USE_NAMESPACE(std)string(1, static_cast<char>(ch)); + } +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/Token.cpp b/poxml/antlr/src/Token.cpp new file mode 100644 index 00000000..f307774f --- /dev/null +++ b/poxml/antlr/src/Token.cpp @@ -0,0 +1,108 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/Token.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +RefToken Token::badToken(new Token(Token::INVALID_TYPE, "<no text>")); + +Token::Token() : type(INVALID_TYPE) +{ +} + +Token::Token(int t) : type(t) +{ +} + +Token::Token(int t, const ANTLR_USE_NAMESPACE(std)string& txt) + : type(t) +{ + type=t; + setText(txt); +} + +int Token::getColumn() const +{ + return 0; +} + +int Token::getLine() const +{ + return 0; +} + +ANTLR_USE_NAMESPACE(std)string Token::getText() const +{ + return "<no text>"; +} + +int Token::getType() const +{ + return type; +} + +void Token::setColumn(int c) +{} + +void Token::setLine(int l) +{} + +void Token::setText(const ANTLR_USE_NAMESPACE(std)string& t) +{} + +void Token::setType(int t) +{ + type=t; +} + +ANTLR_USE_NAMESPACE(std)string Token::toString() const +{ + return "[\""+getText()+"\",<"+type+">]"; +} + +Token::~Token() +{} + +RefToken nullToken; + +#ifndef NO_STATIC_CONSTS +const int Token::MIN_USER_TYPE; +const int Token::NULL_TREE_LOOKAHEAD; +const int Token::INVALID_TYPE; +const int Token::EOF_TYPE; +const int Token::SKIP; +#endif + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenBuffer.cpp b/poxml/antlr/src/TokenBuffer.cpp new file mode 100644 index 00000000..ded5df9b --- /dev/null +++ b/poxml/antlr/src/TokenBuffer.cpp @@ -0,0 +1,107 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/TokenBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**A Stream of Token objects fed to the parser from a TokenStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input tokens. Normally, + * "k" tokens are stored in the buffer. More tokens may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of tokens is deferred. In other words, reading the next + * token is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.Token + * @see antlr.TokenStream + * @see antlr.TokenQueue + */ + +/** Create a token buffer */ +TokenBuffer::TokenBuffer(TokenStream& input_) +: input(input_) +{ nMarkers=0; markerOffset=0; numToConsume=0; } + +/** Mark another token for deferred consumption */ +void TokenBuffer::consume() +{ numToConsume++; } + +/** Ensure that the token buffer is sufficiently full */ +void TokenBuffer::fill(int amount) +{ + syncConsume(); + // Fill the buffer sufficiently to hold needed tokens + while (queue.entries() < amount + markerOffset) { + // Append the next token + queue.append(input.nextToken()); + } +} + +/** Get a lookahead token value */ +int TokenBuffer::LA(int i) +{ + fill(i); + return queue.elementAt(markerOffset+i-1)->type; +} + +/** Get a lookahead token */ +RefToken TokenBuffer::LT(int i) +{ + fill(i); + return queue.elementAt(markerOffset+i-1); +} + +/**Return an integer marker that can be used to rewind the buffer to + * its current state. + */ +int TokenBuffer::mark() +{ + syncConsume(); + nMarkers++; + return markerOffset; +} + +/**Rewind the token buffer to a marker. + * @param mark Marker returned previously from mark() + */ +void TokenBuffer::rewind(int mark) +{ + syncConsume(); + markerOffset=mark; + nMarkers--; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenStreamBasicFilter.cpp b/poxml/antlr/src/TokenStreamBasicFilter.cpp new file mode 100644 index 00000000..71257f46 --- /dev/null +++ b/poxml/antlr/src/TokenStreamBasicFilter.cpp @@ -0,0 +1,34 @@ +#include "antlr/TokenStreamBasicFilter.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object is a TokenStream that passes through all + * tokens except for those that you tell it to discard. + * There is no buffering of the tokens. + */ +TokenStreamBasicFilter::TokenStreamBasicFilter(TokenStream& input_) +: input(&input_) +{ +} + +void TokenStreamBasicFilter::discard(int ttype) +{ + discardMask.add(ttype); +} + +void TokenStreamBasicFilter::discard(const BitSet& mask) +{ + discardMask = mask; +} + +RefToken TokenStreamBasicFilter::nextToken() +{ + RefToken tok = input->nextToken(); + while ( tok && discardMask.member(tok->getType()) ) { + tok = input->nextToken(); + } + return tok; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenStreamHiddenTokenFilter.cpp b/poxml/antlr/src/TokenStreamHiddenTokenFilter.cpp new file mode 100644 index 00000000..827ca382 --- /dev/null +++ b/poxml/antlr/src/TokenStreamHiddenTokenFilter.cpp @@ -0,0 +1,146 @@ +#include "antlr/TokenStreamHiddenTokenFilter.hpp" +#include "antlr/CommonHiddenStreamToken.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**This object filters a token stream coming from a lexer + * or another TokenStream so that only certain token channels + * get transmitted to the parser. + * + * Any of the channels can be filtered off as "hidden" channels whose + * tokens can be accessed from the parser. + */ + +TokenStreamHiddenTokenFilter::TokenStreamHiddenTokenFilter(TokenStream& input) +: TokenStreamBasicFilter(input) +{ +} + +void TokenStreamHiddenTokenFilter::consume() +{ + nextMonitoredToken = input->nextToken(); +} + +void TokenStreamHiddenTokenFilter::consumeFirst() +{ + consume(); + + // Handle situation where hidden or discarded tokens + // appear first in input stream + RefToken p; + // while hidden or discarded scarf tokens + while ( hideMask.member(LA(1)->getType()) || discardMask.member(LA(1)->getType()) ) { + if ( hideMask.member(LA(1)->getType()) ) { + if ( !p ) { + p = LA(1); + } + else { + static_cast<CommonHiddenStreamToken*>(p.get())->setHiddenAfter(LA(1)); + static_cast<CommonHiddenStreamToken*>(LA(1).get())->setHiddenBefore(p); // double-link + p = LA(1); + } + lastHiddenToken = p; + if (!firstHidden) + firstHidden = p; // record hidden token if first + } + consume(); + } +} + +BitSet TokenStreamHiddenTokenFilter::getDiscardMask() const +{ + return discardMask; +} + +/** Return a ptr to the hidden token appearing immediately after + * token t in the input stream. + */ +RefToken TokenStreamHiddenTokenFilter::getHiddenAfter(RefToken t) +{ + return static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenAfter(); +} + +/** Return a ptr to the hidden token appearing immediately before + * token t in the input stream. + */ +RefToken TokenStreamHiddenTokenFilter::getHiddenBefore(RefToken t) +{ + return static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenBefore(); +} + +BitSet TokenStreamHiddenTokenFilter::getHideMask() const +{ + return hideMask; +} + +/** Return the first hidden token if one appears + * before any monitored token. + */ +RefToken TokenStreamHiddenTokenFilter::getInitialHiddenToken() +{ + return firstHidden; +} + +void TokenStreamHiddenTokenFilter::hide(int m) +{ + hideMask.add(m); +} + +void TokenStreamHiddenTokenFilter::hide(const BitSet& mask) +{ + hideMask = mask; +} + +RefToken TokenStreamHiddenTokenFilter::LA(int i) +{ + return nextMonitoredToken; +} + +/** Return the next monitored token. +* Test the token following the monitored token. +* If following is another monitored token, save it +* for the next invocation of nextToken (like a single +* lookahead token) and return it then. +* If following is unmonitored, nondiscarded (hidden) +* channel token, add it to the monitored token. +* +* Note: EOF must be a monitored Token. +*/ +RefToken TokenStreamHiddenTokenFilter::nextToken() +{ + // handle an initial condition; don't want to get lookahead + // token of this splitter until first call to nextToken + if ( !LA(1) ) { + consumeFirst(); + } + + // we always consume hidden tokens after monitored, thus, + // upon entry LA(1) is a monitored token. + RefToken monitored = LA(1); + // point to hidden tokens found during last invocation + static_cast<CommonHiddenStreamToken*>(monitored.get())->setHiddenBefore(lastHiddenToken); + lastHiddenToken = nullToken; + + // Look for hidden tokens, hook them into list emanating + // from the monitored tokens. + consume(); + RefToken p = monitored; + // while hidden or discarded scarf tokens + while ( hideMask.member(LA(1)->getType()) || discardMask.member(LA(1)->getType()) ) { + if ( hideMask.member(LA(1)->getType()) ) { + // attach the hidden token to the monitored in a chain + // link forwards + static_cast<CommonHiddenStreamToken*>(p.get())->setHiddenAfter(LA(1)); + // link backwards + if (p != monitored) { //hidden cannot point to monitored tokens + static_cast<CommonHiddenStreamToken*>(LA(1).get())->setHiddenBefore(p); + } + p = lastHiddenToken = LA(1); + } + consume(); + } + return monitored; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenStreamSelector.cpp b/poxml/antlr/src/TokenStreamSelector.cpp new file mode 100644 index 00000000..2e6527a8 --- /dev/null +++ b/poxml/antlr/src/TokenStreamSelector.cpp @@ -0,0 +1,97 @@ +#include "antlr/TokenStreamSelector.hpp" +#include "antlr/TokenStreamRetryException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** A token stream MUX (multiplexor) knows about n token streams + * and can multiplex them onto the same channel for use by token + * stream consumer like a parser. This is a way to have multiple + * lexers break up the same input stream for a single parser. + * Or, you can have multiple instances of the same lexer handle + * multiple input streams; this works great for includes. + */ + +TokenStreamSelector::TokenStreamSelector() +: input(0) +{ +} + +TokenStreamSelector::~TokenStreamSelector() +{ +} + +void TokenStreamSelector::addInputStream(TokenStream* stream, const ANTLR_USE_NAMESPACE(std)string& key) +{ + inputStreamNames[key] = stream; +} + +TokenStream* TokenStreamSelector::getCurrentStream() const +{ + return input; +} + +TokenStream* TokenStreamSelector::getStream(const ANTLR_USE_NAMESPACE(std)string& sname) const +{ + inputStreamNames_coll::const_iterator i = inputStreamNames.find(sname); + if (i == inputStreamNames.end()) { + throw ANTLR_USE_NAMESPACE(std)string("TokenStream ")+sname+" not found"; + } + return (*i).second; +} + +RefToken TokenStreamSelector::nextToken() +{ + // keep looking for a token until you don't + // get a retry exception + for (;;) { + try { + return input->nextToken(); + } + catch (TokenStreamRetryException& r) { + // just retry "forever" + } + } +} + +TokenStream* TokenStreamSelector::pop() +{ + TokenStream* stream = streamStack.top(); + streamStack.pop(); + select(stream); + return stream; +} + +void TokenStreamSelector::push(TokenStream* stream) +{ + streamStack.push(input); + select(stream); +} + +void TokenStreamSelector::push(const ANTLR_USE_NAMESPACE(std)string& sname) +{ + streamStack.push(input); + select(sname); +} + +void TokenStreamSelector::retry() +{ + throw TokenStreamRetryException(); +} + +/** Set the stream without pushing old stream */ +void TokenStreamSelector::select(TokenStream* stream) +{ + input = stream; +} + +void TokenStreamSelector::select(const ANTLR_USE_NAMESPACE(std)string& sname) +{ + inputStreamNames_coll::const_iterator i = inputStreamNames.find(sname); + if (i == inputStreamNames.end()) { + throw ANTLR_USE_NAMESPACE(std)string("TokenStream ")+sname+" not found"; + } + input = (*i).second; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TreeParser.cpp b/poxml/antlr/src/TreeParser.cpp new file mode 100644 index 00000000..6d302737 --- /dev/null +++ b/poxml/antlr/src/TreeParser.cpp @@ -0,0 +1,165 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ +#include "antlr/TreeParser.hpp" +#include "antlr/ASTNULLType.hpp" +#include "antlr/MismatchedTokenException.hpp" +#include <iostream> +#include <stdlib.h> + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(exit) + +TreeParser::TreeParser() +: inputState(new TreeParserInputState()), traceDepth(0) +{ +} + +TreeParser::TreeParser(const TreeParserSharedInputState& state) +: inputState(state), traceDepth(0) +{ +} + +TreeParser::~TreeParser() +{ +} + +void TreeParser::setTokenNames(const char** tokenNames_) +{ + while (*tokenNames_) { + tokenNames.push_back(*(tokenNames_++)); + } +} + +/** The AST Null object; the parsing cursor is set to this when + * it is found to be null. This way, we can test the + * token type of a node without having to have tests for null + * everywhere. + */ +RefAST TreeParser::ASTNULL(new ASTNULLType); + +/** Get the AST return value squirreled away in the parser */ +//RefAST getAST() const { +// return returnAST; +//} + +void TreeParser::match(RefAST t, int ttype) +{ + if (!t || t==ASTNULL || t->getType()!=ttype) + throw MismatchedTokenException(); +} + +/**Make sure current lookahead symbol matches the given set + * Throw an exception upon mismatch, which is caught by either the + * error handler or by the syntactic predicate. + */ +void TreeParser::match(RefAST t, const BitSet& b) +{ + if ( !t || t==ASTNULL || !b.member(t->getType()) ) { + throw MismatchedTokenException(); + } +} + +void TreeParser::matchNot(RefAST t, int ttype) +{ + //ANTLR_USE_NAMESPACE(std)cout << "match(" << ttype << "); cursor is " << t.toString() << ANTLR_USE_NAMESPACE(std)endl; + if ( !t || t==ASTNULL || t->getType()==ttype ) { + throw MismatchedTokenException(); + } +} + +void TreeParser::panic() +{ + ANTLR_USE_NAMESPACE(std)cerr << "TreeWalker: panic" << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +/** Parser error-reporting function can be overridden in subclass */ +void TreeParser::reportError(const RecognitionException& ex) +{ + ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser error-reporting function can be overridden in subclass */ +void TreeParser::reportError(const ANTLR_USE_NAMESPACE(std)string& s) +{ + ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser warning-reporting function can be overridden in subclass */ +void TreeParser::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s) +{ + ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Specify an object with support code (shared by + * Parser and TreeParser. Normally, the programmer + * does not play with this, using setASTNodeType instead. + */ +// void TreeParser::setASTFactory(ASTFactory f); + +/** Specify the type of node to create during tree building */ +void TreeParser::setASTNodeFactory(ASTFactory::factory_type factory) +{ + astFactory.setASTNodeFactory(factory); +} + +/** Procedure to write out an indent for traceIn and traceOut */ +void TreeParser::traceIndent() +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; +} + +void TreeParser::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname, RefAST t) +{ + traceDepth++; + traceIndent(); + + ANTLR_USE_NAMESPACE(std)cout << "> " << rname.c_str() + << "(" << (t ? t->toString().c_str() : "null") << ")" + << ((inputState->guessing>0)?" [guessing]":"") + << ANTLR_USE_NAMESPACE(std)endl; +} + +void TreeParser::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname, RefAST t) +{ + traceIndent(); + + ANTLR_USE_NAMESPACE(std)cout << "< " << rname.c_str() + << "(" << (t ? t->toString().c_str() : "null") << ")" + << ((inputState->guessing>0)?" [guessing]":"") + << ANTLR_USE_NAMESPACE(std)endl; + + traceDepth--; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/TreeParserSharedInputState.cpp b/poxml/antlr/src/TreeParserSharedInputState.cpp new file mode 100644 index 00000000..89f1d5dc --- /dev/null +++ b/poxml/antlr/src/TreeParserSharedInputState.cpp @@ -0,0 +1,22 @@ +#include "antlr/TreeParserSharedInputState.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input AST. Multiple parsers + * share a single TreeParserSharedInputState to parse + * the same tree or to have the parser walk multiple + * trees. + */ + +TreeParserInputState::TreeParserInputState() +: guessing(0) +{ +} + +TreeParserInputState::~TreeParserInputState() +{ +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/gettext.g b/poxml/gettext.g new file mode 100644 index 00000000..8da92334 --- /dev/null +++ b/poxml/gettext.g @@ -0,0 +1,168 @@ + +header "pre_include_hpp" { +#include <string> +using namespace std; +#include "parser.h" +} + +options { + language="Cpp"; +} + +{ +#include <iostream> +#include "GettextLexer.hpp" +#include "GettextParser.hpp" +#include "antlr/AST.hpp" +#include "antlr/CommonAST.hpp" + +/* +int main() +{ + ANTLR_USING_NAMESPACE(std) + ANTLR_USING_NAMESPACE(antlr) + try { + GettextLexer lexer(cin); + GettextParser parser(lexer); + parser.file(); + + } catch(exception& e) { + cerr << "exception: " << e.what() << endl; + } +} +*/ +} + +class GettextParser extends Parser; + +options { + codeGenMakeSwitchThreshold = 3; + codeGenBitsetTestThreshold = 4; +} + +file returns [ MsgList ml ] +{ +string c, mi, ms; +MsgBlock mb; +MsgList ml2; +} + : (comment T_MSGID) => (mb=file_block ml2=file { ml = ml2; ml.append(mb); } ) + | (comment EOF) => c=comment { (void)c; } + ; + +file_block returns [ MsgBlock mb ] +{ +string c, mi, mip, ms; +} + : c=comment mi=msgid + ( + ( ms=msgstr { + mb.comment = QString::fromUtf8(c.c_str()); + mb.msgid = QString::fromUtf8(mi.c_str()); + mb.msgstr = QString::fromUtf8(ms.c_str()); + } + ) + | + ( mip=msgid_plural ms=msgstr_plural { + mb.comment = QString::fromUtf8(c.c_str()); + mb.msgid = QString::fromUtf8(mi.c_str()); + mb.msgid_plural = QString::fromUtf8(mip.c_str()); + mb.msgstr = QString::fromUtf8(ms.c_str()); + } + ) + ) + ; + +comment returns [string s] +{ +string r; +} + : (c:T_COMMENT r=comment { s = c->getText() + r; } ) + | /* nothing */ + ; + +msgid returns [string s] + : T_MSGID t:T_STRING { s = t->getText(); } + ; + +msgid_plural returns [string s] + : T_MSGID_PLURAL t:T_STRING { s = t->getText(); } + ; + +msgstr returns [string s] + : T_MSGSTR t:T_STRING { s = t->getText(); } + ; + +msgstr_plural returns [string s] + : ( + T_MSGSTR L_BRACKET n:T_INT R_BRACKET t:T_STRING { s = t->getText(); } + )+ + ; + +class GettextLexer extends Lexer; +options { + charVocabulary = '\u0000'..'\u00FF'; + testLiterals=false; // don't automatically test for literals +} + +WS + : (' ' | '\t' + | ('\n' | "\r\n") { newline(); } + ) { $setType(ANTLR_USE_NAMESPACE(antlr)Token::SKIP); } + ; + +L_BRACKET: '[' ; + +R_BRACKET: ']' ; + +T_INT : ( '0'..'9' )+ + ; + +T_COMMENT : '#' (~'\n')* + ; + +MSG_TAG : "msg" ( ("id") ( + "" { $setType(T_MSGID); } + | "_plural" { $setType(T_MSGID_PLURAL); } + ) + | "str" { $setType(T_MSGSTR); } + ) + ; + +T_STRING + : ('"'! (ESC|~'"')* ('"'! (' ' | 't')*! '\n'! { newline(); } (' '! | '\t'!)*))+ + ; + +// copied from example +protected +ESC : '\\' + ( 'n' + | 'r' + | 't' + | 'b' + | 'f' + | '"' + | '\'' + | '\\' + | ('0'..'3') + ( + options { + warnWhenFollowAmbig = false; + } + : ('0'..'9') + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'9' + )? + )? + | ('4'..'7') + ( + options { + warnWhenFollowAmbig = false; + } + : ('0'..'9') + )? + ) + ; diff --git a/poxml/lauri.po b/poxml/lauri.po new file mode 100644 index 00000000..9dd0d025 --- /dev/null +++ b/poxml/lauri.po @@ -0,0 +1,442 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR Free Software Foundation, Inc. +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2001-02-09 01:25+0100\n" +"PO-Revision-Date: 2001-07-02 20:31MET\n" +"Last-Translator: Stephan Kulow <coolo@kde.org>\n" +"Language-Team: german <kde-i18n-de@kde.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: KBabel 0.9.2\n" + +#. Tag: title +#: lauri.xml:16 +#, no-c-format +msgid "test document" +msgstr "Testtext" + +#. Tag: author +#: lauri.xml:17 +#, no-c-format +msgid "<firstname>Stephan</firstname><surname>Kulow</surname>" +msgstr "<firstname>Stephan</firstname><surname>Kulow</surname>" + +#. Tag: para +#: lauri.xml:19 +#, no-c-format +msgid "This is nonsense" +msgstr "Dies ist Schwachsinn" + +#. Tag: keyword +#: lauri.xml:20 +#, no-c-format +msgid "<keyword>KDE</keyword>" +msgstr "<keyword>KDE.de</keyword>" + +#. Tag: title +#: lauri.xml:25 +#, no-c-format +msgid ""Text" for &lauri;" +msgstr ""Text" für &lauri;" + +#. Tag: title +#: lauri.xml:27 +#, no-c-format +msgid "Section 1" +msgstr "Abschnitt 1" + +#. Tag: title +#: lauri.xml:29 +#, no-c-format +msgid "Section 1.1" +msgstr "Abschnitt 1.1" + +#. Tag: para +#: lauri.xml:31 +#, no-c-format +msgid "" +"<emphasis><emphasis role=\"blah\">Warum kann ich meinem Rechner nicht " +"einfach ausschalten?</emphasis></emphasis>Hier noch" +msgstr "" +"<emphasis><emphasis role=\"blah\">Warum kann ich meinem Rechner nicht " +"einfach ausschalten?</emphasis></emphasis>Hier noch" + +#. Tag: para +#: lauri.xml:34 +#, no-c-format +msgid "<application>me</application> can't be turned off." +msgstr "Das <application>me</application> kann nicht ausgemacht werden." + +#. Tag: para +#: lauri.xml:36 +#, no-c-format +msgid "Leading "Text" for &lauri;" +msgstr ""Starttext" für &lauri;" + +#. Tag: para +#: lauri.xml:39 +#, no-c-format +msgid "" +"we pretend her name was Höpfner, but that <anchor id=\"help\"/> is a good " +"name too" +msgstr "" +"sagen wir mal sie heisst Höpfner, aber <anchor id=\"help\"/> ist auch gut" + +#. Tag: para +#: lauri.xml:39 +#, no-c-format +msgid "<keycap> Shift </keycap> <keycap> help </keycap>" +msgstr "<keycap> Schieber </keycap> <keycap> Hilfe </keycap>" + +#. Tag: primary +#: lauri.xml:45 +#, no-c-format +msgid "<primary>kde</primary>" +msgstr "<primary>kde.de</primary>" + +#. Tag: para +#: lauri.xml:46 +#, no-c-format +msgid "" +"an archive of the developer's mailing list is at <ulink url=\"http://lists." +"kde.org/?l=kde-kmail&r=1&w=2\">lists.kde.org</ulink>." +msgstr "" +"Es gibt ein Archiv <ulink url=\"http://lists.kde.org/?l=kde-kmail&" +"r=1&w=2\">lists.kde.org</ulink>." + +#. Tag: trans_comment +#: lauri.xml:51 +#, no-c-format +msgid "GIVE_ME_CREDIT" +msgstr "<para>Habe ich gemacht - toll ne?</para>" + +#. Tag: term +#: lauri.xml:56 +#, no-c-format +msgid "Text 1" +msgstr "Schrift 1" + +#. Tag: para +#: lauri.xml:60 +#, no-c-format +msgid "Text 2" +msgstr "Schrift 2" + +#. Tag: para +#: lauri.xml:63 +#, no-c-format +msgid "Text 3 \"everything\"" +msgstr "Text 3 \"alles\"" + +#. Tag: term +#: lauri.xml:65 +#, no-c-format +msgid "Everything" +msgstr "Alles" + +#. Tag: para +#: lauri.xml:65 +#, no-c-format +msgid "Is correct" +msgstr "Ist klar" + +#. Tag: term +#: lauri.xml:66 +#, no-c-format +msgid "Nothing" +msgstr "Nichts" + +#. Tag: para +#: lauri.xml:66 +#, no-c-format +msgid "Is wrong" +msgstr "ist falsch" + +#. Tag: para +#: lauri.xml:68 +#, no-c-format +msgid "Text 4 \\\"even more\\\"" +msgstr "Text 4 \\\"noch mehr\\\"" + +#. Tag: para +#: lauri.xml:73 +#, no-c-format +msgid "Text 4 \\\"even less\\\"" +msgstr "Text 4 \\\"noch weniger\\\"" + +#. Tag: menuchoice +#: lauri.xml:85 +#, no-c-format +msgid "" +"<shortcut><keycombo><keycap>Ctrl</keycap><keycap>N</keycap></keycombo></" +"shortcut> <guimenu><accel>F</accel>ile</guimenu><guimenuitem><accel>N</" +"accel>ew</guimenuitem>" +msgstr "" +"<shortcut><keycombo><keycap>Ctrl</keycap><keycap>N</keycap></keycombo></" +"shortcut> <guimenu><accel>D</accel>atei</guimenu><guimenuitem><accel>N</" +"accel>eu</guimenuitem>" + +#. Tag: action +#: lauri.xml:88 +#, no-c-format +msgid "This starts a new Document in a new instance of the editor." +msgstr "Dies macht alles neu." + +#. Tag: title +#: lauri.xml:96 +#, no-c-format +msgid "What XML looks like" +msgstr "Wie XML aussieht" + +#. Tag: para +#: lauri.xml:98 +#, no-c-format +msgid "" +"Here is an example of an XML file used by <application>Columbo</application> " +"to describe a search site on the Internet:" +msgstr "Dies ist <application>Columbo</application>:" + +#. Tag: programlisting +#: lauri.xml:110 +#, no-c-format +msgid "" +"<![CDATA[<!DOCTYPE search>\n" +"<search \n" +" name=\"Altavista\" \n" +" channel=\"web\"\n" +" method=\"get\"\n" +" action=\"http://www.altavista.com/cgi-bin/query\"\n" +">\n" +"\n" +" <input name=\"pg\" value=\"q\"/>\n" +" <input name=\"sc\" value=\"on\"/>\n" +" <input name=\"hl\" value=\"on\"/>\n" +" <input name=\"kl\" value=\"XX\"/>\n" +" <input name=\"stype\" value=\"stext\"/>\n" +" <input name=\"q\" user=\"true\"/>\n" +"\n" +" <interpret\n" +" resultListStart=\"<dl>\"\n" +" resultItemStart=\"<dt>\"\n" +" relevanceStart=\"\"\n" +" resultListEnd=\"</td>\"\n" +" resultItemEnd=\"</dl>\"\n" +" relevanceEnd=\"\"\n" +" />\n" +"</search>]]>" +msgstr "" +"\n" +"<![CDATA[<!DOCTYPE search>\n" +"<search \n" +" name=\"Altawista\" \n" +" channel=\"Netz\"\n" +" method=\"krieg\"\n" +" action=\"http://www.altawista.com/cgi-bin/query\"\n" +">\n" +"\n" +" <input name=\"pg\" value=\"q\"/>\n" +" <input name=\"sc\" value=\"on\"/>\n" +" <input name=\"hl\" value=\"on\"/>\n" +" <input name=\"kl\" value=\"XX\"/>\n" +" <input name=\"stype\" value=\"stext\"/>\n" +" <input name=\"q\" user=\"wahr\"/>\n" +"\n" +" <interpret\n" +" resultListAnfang=\"<dl>\"\n" +" resultItemAnfang=\"<dt>\"\n" +" relevanceAnfang=\"\"\n" +" resultListEnde=\"</td>\"\n" +" resultItemEnd=\"</dl>\"\n" +" relevanceEnd=\"\"\n" +" />\n" +"</search>]]>" + +#. Tag: para +#: lauri.xml:113 +#, no-c-format +msgid "" +"This instruction is normally used to declare the DTD of the document. Here " +"no DTD is used, and only the name of the root element (<varname>search</" +"varname>) appears." +msgstr "Das ist normalerweise DTD" + +#. Tag: para +#: lauri.xml:120 +#, no-c-format +msgid "" +"<sgmltag class=\"starttag\">search</sgmltag> begins the root element. Here, " +"it extends to the end of the document (<sgmltag class=\"endtag\">search</" +"sgmltag>)." +msgstr "" +"<sgmltag class=\"starttag\">search</sgmltag> begins the root element. Here, " +"it extends to the end of the document (<sgmltag class=\"endtag\">search</" +"sgmltag>)." + +#. Tag: para +#: lauri.xml:127 +#, no-c-format +msgid "" +"This is an example of an empty element. Empty elements do not need a closing " +"tag (which would be <varname></input></varname> in this case)." +msgstr "" +"This is an example of an empty element. Empty elements do not need a closing " +"tag (which would be <varname></input></varname> in this case)." + +#. Tag: title +#: lauri.xml:139 +#, no-c-format +msgid "The ugly part" +msgstr "Der schlimme Teil" + +#. Tag: para +#: lauri.xml:140 +#, no-c-format +msgid "Ending Text:" +msgstr "Ende:" + +#. Tag: literallayout +#: lauri.xml:143 +#, no-c-format +msgid "" +"Matthias Hoelzer\n" +"KDE-Verein i.G.\n" +"Account-Nr. 2798296\n" +"\n" +"Staedtische Sparkasse Wuerzburg\n" +"Hofstrasse 9\n" +"97070 Wuerzburg\n" +"Germany\n" +"BLZ 790 500 00\n" +"SWIFT-Address: BYLA DE 77\n" +"\n" +"print \"$b4 /path/to/KDE/libs/libpng.a $af\\n\"; \\\n" +"you see it here\n" +"whereever" +msgstr "" +"\n" +"Matthias Hoelzer\n" +"KDE-Verein i.G.\n" +"Account-Nr. 2798296\n" +"\n" +"FrankenSWIFT-Address: BYLA DE 77\n" +"hallo Du" + +#. Tag: screen +#: lauri.xml:146 +#, no-c-format +msgid "" +"Expect ogin: <lineannotation># remember, we do ordinary " +"terminal login</lineannotation>\n" +"ID \"\" <lineannotation># kppp sends the id you " +"configured in the main dialog</lineannotation>\n" +"Expect for userxyz: <lineannotation># a list of available numbers " +"is shown, the user should choose one</lineannotation> \n" +"Send userxyz-home <lineannotation># the user wants to be called " +"back on their home number</lineannotation>\n" +"Expect ogin: <lineannotation># The callback process is now " +"running, a new connection, and so a new login.</lineannotation>\n" +"ID\n" +"Expect assword: <lineannotation># Now send your password</" +"lineannotation>\n" +"Expect > <lineannotation># Wait for the command " +"prompt (the prompt may vary)</lineannotation>\n" +"Send start_ppp <lineannotation># this command starts the pppd</" +"lineannotation>" +msgstr "" +"\n" +"Expect ogin: <lineannotation># Dies ist alles nicht so " +"schlimm!</lineannotation>\n" +"ID \"\" <lineannotation># kppp sends the id you " +"configured in the main dialog</lineannotation>\n" +"Expect for userxyz: <lineannotation># a list of available numbers " +"is shown, the user should choose one</lineannotation> \n" +"Send userxyz-home <lineannotation># the user wants to be called " +"back on their home number</lineannotation>\n" +"Expect ogin: <lineannotation># The callback process is now " +"running, a new connection, and so a new login.</lineannotation>\n" +"ID\n" +"Expect assword: <lineannotation># Now send your password</" +"lineannotation>\n" +"Expect > <lineannotation># Wait for the command " +"prompt (the prompt may vary)</lineannotation>\n" +"Send start_ppp <lineannotation># this command starts the pppd</" +"lineannotation>" + +#. Tag: screen +#: lauri.xml:150 +#, no-c-format +msgid "" +"Send <lineannotation># send an empty string</" +"lineannotation>\n" +"Expect ID:\n" +"Send itsme\n" +"Expect word:\n" +"Send forgot\n" +"Expect granted\n" +"Send ppp" +msgstr "" +"\n" +"Send <lineannotation># send einen leerenstring</" +"lineannotation>\n" +"Expect ID:\n" +"Send ichbins\n" +"Expect word:\n" +"Send forgot\n" +"Expect granted\n" +"Send ppp" + +#. Tag: programlisting +#: lauri.xml:152 +#, no-c-format +msgid "" +"-> #include <qpixmap.h>\n" +"-> #include <qpen.h>\n" +"\n" +" class KScribbleDoc\n" +" {\n" +"\n" +"-> protected:\n" +"\n" +"-> QPen currentPen(){ return pen;}; \n" +" \n" +"-> int penWidth()\n" +"-> { return pen.width(); }\n" +"\n" +" public slots:\n" +" void updateAllViews(KScribbleView *sender);\n" +" \n" +" protected:\n" +" \n" +"-> QPixmap buffer;\n" +" \n" +" private:\n" +"-> QPen pen;\n" +" /** the modified flag of the current document */\n" +" bool modified;" +msgstr "" +"\n" +"-> #include <qpixmap.h>\n" +"-> #include <qpen.h>\n" +" class KScribbleDoc\n" +" {\n" +"-> protected:\n" +"-> QPen currentPen(){ return pen;}; \n" +" \n" +"-> int penWidth()\n" +"-> { return pen.width(); }\n" +" public slots:\n" +" void updateAllViews(KScribbleView *sender);\n" +" \n" +" protected:\n" +" \n" +"-> QPixmap buffer;\n" +" \n" +" private:\n" +"-> QPen pen;\n" +" /** das veraenderte flag of the current document */\n" +" bool modified;" diff --git a/poxml/lauri.xml b/poxml/lauri.xml new file mode 100644 index 00000000..27715981 --- /dev/null +++ b/poxml/lauri.xml @@ -0,0 +1,241 @@ +<?xml version="1.0" ?> +<!DOCTYPE book PUBLIC "-//KDE//DTD DocBook XML V4.2-Based Variant V1.1//EN" "dtd/kdex.dtd" [ + <!ENTITY % English "INCLUDE" > <!-- change language only here --> + <!ENTITY lauri "<emphasis>Lauri</emphasis>" > +]> + +<book> + + +<bookinfo> +<date>06/08/2000</date> +<releaseinfo>0.05.00</releaseinfo> + +<!-- *********************** Test **************** --> + +<title>test document</title> +<author><firstname>Stephan</firstname><surname>Kulow</surname></author> + +<abstract><para>This is nonsense</para></abstract> +<keywordset><keyword>KDE</keyword></keywordset> + +</bookinfo> + +<chapter id="hello"> +<title>"Text" for &lauri;</title> +<sect1> +<title>Section 1</title> +<sect2> +<title>Section 1.1</title> + <anchor id="try"/> + <para><emphasis><emphasis role="blah">Warum kann ich + meinem Rechner nicht einfach + ausschalten?</emphasis></emphasis>Hier noch</para> + <para><application>me</application> can't be turned off.</para> +<para> + <note><para> + Leading "Text" for &lauri; + </para></note> + <caution><para>we pretend her name was Höpfner, but that <anchor id="help"/> is a good name too</para></caution> + <keycap> + Shift + </keycap> + <keycap> + help + </keycap><indexterm><primary>kde</primary></indexterm> + <tip><para>an archive of the +developer's mailing list is at <ulink +url="http://lists.kde.org/?l=kde-kmail&r=1&w=2">lists.kde.org</ulink>. + </para></tip> +</para> +<!-- TRANS:GIVE_ME_CREDIT --> + +<para> + <variablelist> + <varlistentry> + <term> + Text 1 + </term> + <listitem> + <para id="blah"> + Text 2 + <note> + <para> + Text 3 "everything" + <variablelist><varlistentry><term>Everything</term><listitem><para>Is correct</para></listitem></varlistentry></variablelist> + <variablelist><varlistentry><term>Nothing</term><listitem><para>Is wrong</para></listitem></varlistentry></variablelist> + <footnote> + <para> + Text 4 \"even more\" + </para> + <para> + <note> + <para> + Text 4 \"even less\" + </para> + </note> + </para> + </footnote> + </para> + </note> + </para> + </listitem> + </varlistentry> + <varlistentry> + <term><anchor id="new"/><menuchoice> +<shortcut><keycombo><keycap>Ctrl</keycap><keycap>N</keycap></keycombo></shortcut> +<guimenu><accel>F</accel>ile</guimenu><guimenuitem><accel>N</accel>ew</guimenuitem></menuchoice></term> + <listitem><para><action>This starts a new Document in a new instance of the +editor.</action></para> +</listitem> + </varlistentry> + </variablelist> +</para> +</sect2> + <sect2 id="xml-format"> + <title>What XML looks like</title> + + <para> + Here is an example of an XML file used + by <application>Columbo</application> to describe a search site on the + Internet: + </para> + <para> + <programlistingco> + <areaspec> + <area id="xml.doctype" coords="1"/> + <area id="xml.rootel" coords="2"/> + <area id="xml.nestedel" coords="9"/> + </areaspec> + <programlisting> +<![CDATA[<!DOCTYPE search> +<search + name="Altavista" + channel="web" + method="get" + action="http://www.altavista.com/cgi-bin/query" +> + + <input name="pg" value="q"/> + <input name="sc" value="on"/> + <input name="hl" value="on"/> + <input name="kl" value="XX"/> + <input name="stype" value="stext"/> + <input name="q" user="true"/> + + <interpret + resultListStart="<dl>" + resultItemStart="<dt>" + relevanceStart="" + resultListEnd="</td>" + resultItemEnd="</dl>" + relevanceEnd="" + /> +</search>]]> + </programlisting> + <calloutlist> + <callout arearefs="xml.doctype"> + <para> + This instruction is normally used to declare the DTD of the + document. Here no DTD is used, and only the name of the root + element (<varname>search</varname>) appears. + </para> + </callout> + <callout arearefs="xml.rootel"> + <para> + <sgmltag class="starttag">search</sgmltag> begins the root + element. Here, it extends to the end of the document + (<sgmltag class="endtag">search</sgmltag>). + </para> + </callout> + <callout arearefs="xml.nestedel"> + <para> + This is an example of an empty element. Empty elements do not + need a closing tag (which would be + <varname></input></varname> in this case). + </para> + </callout> + </calloutlist> + </programlistingco> + </para> + + </sect2> +<sect2> +<title>The ugly part</title> +<para> + Ending Text: + +<literallayout> +Matthias Hoelzer +KDE-Verein i.G. +Account-Nr. 2798296 + +Staedtische Sparkasse Wuerzburg +Hofstrasse 9 +97070 Wuerzburg +Germany +BLZ 790 500 00 +SWIFT-Address: BYLA DE 77 + +print "$b4 /path/to/KDE/libs/libpng.a $af\n"; \ +you see it here +whereever +</literallayout> +</para> +<para> +<screen> +Expect ogin: <lineannotation># remember, we do ordinary terminal login</lineannotation> +ID "" <lineannotation># kppp sends the id you configured in the main dialog</lineannotation> +Expect for userxyz: <lineannotation># a list of available numbers is shown, the user should choose one</lineannotation> +Send userxyz-home <lineannotation># the user wants to be called back on their home number</lineannotation> +Expect ogin: <lineannotation># The callback process is now running, a new connection, and so a new login.</lineannotation> +ID +Expect assword: <lineannotation># Now send your password</lineannotation> +Expect > <lineannotation># Wait for the command prompt (the prompt may vary)</lineannotation> +Send start_ppp <lineannotation># this command starts the pppd</lineannotation> +</screen> +</para> + +<para> +<screen> +Send <lineannotation># send an empty string</lineannotation> +Expect ID: +Send itsme +Expect word: +Send forgot +Expect granted +Send ppp +</screen> + +<programlisting> +-> #include <qpixmap.h> +-> #include <qpen.h> + + class KScribbleDoc + { + +-> protected: + +-> QPen currentPen(){ return pen;}; + +-> int penWidth() +-> { return pen.width(); } + + public slots: + void updateAllViews(KScribbleView *sender); + + protected: + +-> QPixmap buffer; + + private: +-> QPen pen; + /** the modified flag of the current document */ + bool modified; +</programlisting> +</para> +</sect2> +</sect1> +</chapter> +</book> + diff --git a/poxml/parser.cpp b/poxml/parser.cpp new file mode 100644 index 00000000..c34976bf --- /dev/null +++ b/poxml/parser.cpp @@ -0,0 +1,1008 @@ +// #define POXML_DEBUG + +#include "parser.h" +#include <iostream> +#include <stdlib.h> +#include <assert.h> +#include <qregexp.h> + +using namespace std; + +static const char *singletags[] = {"beginpage","imagedata", "colspec", "spanspec", + "anchor", "xref", "area", + "footnoteref", "void", "inlinegraphic", + "glosssee", "graphic", "xi:include", + 0}; +static const char *cuttingtags[] = {"bridgehead", "trans_comment", "para", "title", "term", + "entry", "contrib", "keyword", "example", + "note", "footnote", "caution", + "informalexample", "remark", "comment", + "imageobject", "varlistentry", "thead", + "tbody", "tgroup", "row", "screenshot", "screeninfo", + "variablelist", "step", "procedure", + "step", "holder", "listitem", "important", + "author", "itemizedlist", "orderedlist", + "caption", "textobject", "mediaobject", + "tip", "glossdef", "inlinemediaobject", + "simplelist", "member", "glossentry", + "areaspec", "corpauthor", "indexterm", + "calloutlist", "callout", "subtitle", + "table", "part", "xi:fallback", "primary", + "secondary", "chapter", "sect1", "sect2", + "figure", "abstract", "sect3", "sect", "sect4", + "warning", "preface", "authorgroup", "keywordset", + "informaltable", "qandaentry", "question", "answer", + "othercredit", "affiliation", "qandaset", + "cmdsynopsis", "funcsynopsis", "funcsynopsisinfo" , + "epigraph", "attribution", "glossary", "chapterinfo", + "glossdiv", "blockingquote", "simplesect", "section", + "qandadiv", "refsect1", "refmeta", "formalpara", + "refentry", "refnamediv", "refpurpose", "refentrytitle", + "refmiscinfo", "refsect2", "refsect3", "refsect1info", + "refsect2info", "refsect3info", "refsection", "refsectioninfo", + "refsynopsisdiv", "refsysnopsisdivinfo", "remark", + "revdescription", "glossentry", "partinfo", + "segmentedlist", "segtitle", "seg", "seglistitem", "screenco", + 0}; +static const char *literaltags[] = {"literallayout", "synopsis", "screen", + "programlisting", 0}; + +bool StructureParser::fatalError ( const QXmlParseException &e ) +{ + cerr << "fatalError " << e.message().latin1() << " " << e.lineNumber() << " " + << e.columnNumber() << endl; + return false; +} + +bool StructureParser::startDocument() +{ + infos_reg = QRegExp("\\s*poxml_line=\"(\\d+)\" poxml_col=\"(\\d+)\""); + do_not_split_reg = QRegExp("\\s*condition=\"do-not-split\""); + message = ""; + inside = 0; + return true; +} + +bool StructureParser::isCuttingTag(const QString &qName) +{ + int index = 0; + while (cuttingtags[index]) { + if (cuttingtags[index] == qName) + return true; + index++; + } + return isLiteralTag(qName); +} + +bool StructureParser::isSingleTag(const QString &qName) +{ + int index = 0; + while (singletags[index]) { + if (singletags[index] == qName) + return true; + index++; + } + return false; +} + +bool StructureParser::isLiteralTag(const QString &qName) +{ + int index = 0; + while (literaltags[index]) { + if (literaltags[index] == qName) + return true; + index++; + } + return false; +} + +bool StructureParser::skippedEntity ( const QString & name ) +{ + if (inside) + message += QString("&%1;").arg(name); + return true; +} + +bool StructureParser::startElement( const QString& , const QString& , + const QString& qName, + const QXmlAttributes & attr ) +{ + QString tname = qName.lower(); + + bool first = false; + + if (isCuttingTag(tname)) { + if (!inside) { + message = QString::null; + list.pc.increasePara(); + startline = locator->lineNumber(); + startcol = locator->columnNumber(); + first = true; + } + inside++; + } + + if (inside) + { + QString tmp = "<" + tname; + for (int i = 0; i < attr.length(); i++) { + tmp += QString(" %1=\"%2\"").arg(attr.qName(i)).arg(attr.value(i)); + } + tmp += QString(" poxml_line=\"%1\"").arg(locator->lineNumber()); + tmp += QString(" poxml_col=\"%1\"").arg(locator->columnNumber()); + + if (isSingleTag(qName)) + tmp += "/>"; + else + tmp += ">"; + message += tmp; + if (first) + startcol -= message.length(); + } + + if (tname == "anchor" || tname.left(4) == "sect" || tname == "chapter") + if (!attr.value("id").isEmpty()) list.pc.addAnchor(attr.value("id")); + + return true; +} + +bool StructureParser::startCDATA() +{ + if ( inside ) + message += "<![CDATA["; + return true; +} + +bool StructureParser::endCDATA() +{ + if ( inside ) + message += "]]>"; + return true; +} + +bool StructureParser::isClosure(const QString &message) +{ + assert(message.at(0) == '<'); + int endindex = 1; + while (!message.at(endindex).isSpace() && message.at(endindex) != '>') + endindex++; + QString tag = message.mid(1, endindex - 1); + return closureTag(message, tag); +} + +bool StructureParser::closureTag(const QString& message, const QString &tag) +{ +#ifdef POXML_DEBUG + qDebug("closureTag %s %s", message.latin1(), tag.latin1()); +#endif + + int inside = 0; + uint index = 0; + while (true) + { + int nextclose = message.find(QRegExp(QString::fromLatin1("</%1[\\s>]").arg(tag)), index); + int nextstart = message.find(QRegExp(QString::fromLatin1("<%1[>\\s]").arg(tag)), index); + // qDebug("finding %d %d %d %d", nextstart, nextclose, index, inside); + if (nextclose == -1) { +#ifdef POXML_DEBUG + qDebug("ending on no close anymore %d %d %d %d", (!inside && index >= message.length()), inside, index, message.length()); +#endif + return !inside && index >= message.length(); + } + if (nextstart == -1) + nextstart = message.length() + 1; + + if (nextstart < nextclose) { + inside++; + index = nextstart + 1; + while (message.at(index) != '>') + index++; + index++; + } else { + inside--; + index = nextclose + 1; + while (message.at(index) != '>') + index++; + index++; + if (!inside) { +#ifdef POXML_DEBUG + qDebug("ending on exit %d", index >= message.length()); +#endif + return index >= message.length(); + } + } + } +} + +void StructureParser::descape(QString &message) +{ + uint index = 0; + stripWhiteSpace( message ); + + int inside = 0; + bool lastws = false; + + while (index < message.length()) { + switch (message.at(index).latin1()) { + case '\n': + case '\t': + case '\r': + if (!inside) + message[index] = ' '; + case ' ': + if (!inside && lastws) + message[index] = '\010'; + lastws = true; + break; + case '<': { + uint endindex = index+1; + while (endindex < message.length() && !message.at(endindex).isSpace() && + message.at(endindex) != '>') + endindex++; + QString tag = message.mid(index + 1, endindex - index - 1); + if (tag.at(0) == '/') { + if (isLiteralTag(tag.mid(1))) + inside--; + } else + if (isLiteralTag(tag)) + inside++; + break; + } + default: + lastws = false; + } + + index++; + } + message.replace(QRegExp("\010"), ""); +} + +bool StructureParser::formatMessage(MsgBlock &msg) const +{ +#ifdef POXML_DEBUG + qDebug("formatMessage %s", msg.msgid.latin1()); +#endif + + int offset = 0; + bool changed = false; + bool recurse = true; + + if (msg.msgid.isEmpty()) + return true; + + for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++); + stripWhiteSpace( msg.msgid ); + + // removing starting single tags + for (int index = 0; singletags[index]; index++) + { + int slen = strlen(singletags[index]); + + if (msg.msgid.left(slen + 1) == QString::fromLatin1("<%1").arg(singletags[index]) && + !msg.msgid.at( slen + 1 ).isLetterOrNumber() ) + { +#ifdef POXML_DEBUG + qDebug("removing single tag %s", singletags[index]); +#endif + int strindex = strlen(singletags[index]) + 1; + while (msg.msgid.at(strindex) != '>') + strindex++; + msg.msgid = msg.msgid.mid(strindex + 1); + changed = true; + offset += strindex + 1; + for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ; + stripWhiteSpace( msg.msgid ); + } + } + + while (msg.msgid.right(2) == "/>") + { + int strindex = msg.msgid.length() - 2; + while (msg.msgid.at(strindex) != '<') + strindex--; + msg.msgid = msg.msgid.left(strindex); + stripWhiteSpace( msg.msgid ); // only removed space at the end + changed = true; + } + + for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ; + stripWhiteSpace( msg.msgid ); + + while (true) { + if (msg.msgid.at(0) != '<') + break; + if (msg.msgid.at(msg.msgid.length() - 1) != '>') + break; + int strindex = 1; + while (msg.msgid.at(strindex) != ' ' && msg.msgid.at(strindex) != '>') + strindex++; + QString starttag = msg.msgid.mid(1, strindex - 1); + int endindex = msg.msgid.length() - 2; + while (msg.msgid.at(endindex) != '<' && msg.msgid.at(endindex + 1) != '/') + endindex--; +#ifdef POXML_DEBUG + qDebug("endIndex %d", endindex); +#endif + strindex = endindex; + QString orig = msg.msgid; + + QString endtag = msg.msgid.mid(endindex + 2, msg.msgid.length() - (endindex + 2) - 1); + QString endtag_attr = endtag.mid(endtag.find(' '), endtag.length()); + endtag.replace(infos_reg, ""); + if (endtag == starttag) { + if (!closureTag(msg.msgid, starttag)) + break; + + // removing start/end tags + msg.msgid = msg.msgid.left(endindex); + strindex = 0; + while (msg.msgid.at(strindex) != '>') + strindex++; + QString attr = msg.msgid.left(strindex); + msg.msgid = msg.msgid.mid(strindex + 1); + offset += strindex + 1; + for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ; + stripWhiteSpace( msg.msgid ); + msg.tag = starttag; + + if (infos_reg.search(attr) >= 0) { + msg.lines.first().start_line = infos_reg.cap(1).toInt(); + msg.lines.first().start_col = infos_reg.cap(2).toInt(); +#ifdef POXML_DEBUG + qDebug("col %s %s %d", attr.latin1(), msg.msgid.latin1(), msg.lines.first().start_col); +#endif + offset = 0; + + if (infos_reg.search(endtag_attr) >= 0) { + msg.lines.first().end_line = infos_reg.cap(1).toInt(); + msg.lines.first().end_col = infos_reg.cap(2).toInt() + 1; + } + } + if (do_not_split_reg.search(attr) >= 0) { + msg.do_not_split = true; + break; + } + + changed = true; + } else + break; + } + +#ifdef POXML_DEBUG + qDebug("formatMessage result %s %d %d", msg.msgid.latin1(), changed && recurse, msg.lines.first().start_col); +#endif + + msg.lines.first().offset += offset; + if (msg.do_not_split) + recurse = false; + + if (changed && recurse) + formatMessage(msg); + + return !recurse; // indicates an abort +} + +MsgList StructureParser::splitMessage(const MsgBlock &mb) +{ + MsgList result; + + MsgBlock msg1 = mb; + MsgBlock msg2 = mb; + + QString message = mb.msgid; + +#ifdef POXML_DEBUG + qDebug("splitMessage %s", message.latin1()); +#endif + + if (message.at(0) == '<') { + int endindex = 1; + while (!message.at(endindex).isSpace() && message.at(endindex) != '>') + endindex++; + QString tag = message.mid(1, endindex - 1); + + if (closureTag(message, tag)) + goto error; + + if (isCuttingTag(tag)) + { + // if the message starts with a cutting tag, this tag has to + // end in between. We split both messages and format them + int strindex = endindex; + strindex++; + + int inside = 1; + while (true) { +#ifdef POXML_DEBUG + qDebug("inside %s %d", message.mid(strindex, 35).latin1(), inside); +#endif + + // the exception for poxml_* attributes is made in the closing tag + int closing_index = message.find(QRegExp(QString::fromLatin1("</%1[\\s>]").arg(tag)), + strindex); + int starting_index = message.find(QRegExp(QString::fromLatin1("<%1[\\s>]").arg(tag)), + strindex); + +#ifdef POXML_DEBUG + qDebug("index1 %d %d %d", closing_index, starting_index, strindex); +#endif + + // when a new start was found, we set the start_index after the next match + // (and set strindex to it later - increasing inside) + if (starting_index != -1) { + starting_index += tag.length() + 1; + while (message.at(starting_index) != '>') + starting_index++; + starting_index++; + } + +#ifdef POXML_DEBUG + qDebug("index %d %d %d", closing_index, starting_index, strindex); +#endif + + assert(closing_index != -1); + closing_index += 3 + tag.length(); + while (message.at(closing_index - 1) != '>') + closing_index++; + + if (starting_index == -1) { + strindex = closing_index; +#ifdef POXML_DEBUG + qDebug("set strindex %d", strindex); +#endif + inside--; + if (!inside) + break; + continue; + } + if (closing_index < starting_index) + { + strindex = closing_index; + inside--; + } else { + strindex = starting_index; + inside++; + } + + if (!inside) + break; + } + +#ifdef POXML_DEBUG + qDebug("split into %s -AAAAAANNNNNNDDDDDD- %s", message.left(strindex).latin1(), message.mid(strindex).latin1()); +#endif + msg1.msgid = message.left(strindex); + bool leave = formatMessage(msg1); + + msg2.msgid = message.mid(strindex); + msg2.lines.first().offset += strindex; + leave = leave & formatMessage(msg2); + + if (msg1.lines.first().end_line > msg2.lines.first().start_line || + (msg1.lines.first().end_line == msg2.lines.first().start_line && + msg1.lines.first().end_col > msg2.lines.first().start_col)) + { + msg2.lines.first().start_line = msg1.lines.first().end_line; + msg2.lines.first().start_col = msg1.lines.first().end_col; + } + +#ifdef POXML_DEBUG + qDebug("splited %d-%d(%s) and %d-%d(%s)", msg1.lines.first().end_line,msg1.lines.first().end_col, + msg1.msgid.latin1(), + msg2.lines.first().start_line,msg2.lines.first().start_col, msg2.msgid.latin1()); +#endif + + if (leave) { + result.append(msg1); + result.append(msg2); + return result; + } + result = splitMessage(msg1); + result += splitMessage(msg2); + return result; + } + + } + + if (message.at(message.length() - 1 ) == '>') + { + int endindex = message.length() - 1; + while (endindex >= 0 && (message.at(endindex) != '<' || message.at(endindex + 1) != '/')) + endindex--; + QString tag = message.mid(endindex + 2, message.length() - endindex - 3); + if (tag.find(' ') > 0 ) { + tag = tag.left(tag.find(' ')); + } +#ifdef POXML_DEBUG + qDebug("behind tag %s", tag.latin1()); +#endif + + if (isCuttingTag(tag)) + { + // if the message ends with a cutting tag, this tag has to + // start in between. We split both messages and format them + int strindex = endindex; + + int inside = 1; + while (true) { +#ifdef POXML_DEBUG + qDebug("inside %s %d", message.mid(strindex, 35).latin1(), inside); +#endif + + int closing_index = message.findRev(QRegExp(QString::fromLatin1("</%1[\\s>]").arg(tag)), + strindex - 1); + int starting_index = message.findRev(QRegExp(QString::fromLatin1("<%1[\\s>]").arg(tag)), + strindex - 1); + +#ifdef POXML_DEBUG + qDebug("index1 %d %d %d", closing_index, starting_index, strindex); +#endif + + if (starting_index == -1) { + assert(inside == 1); + break; + } + + if (closing_index > starting_index) + { + strindex = closing_index; + inside++; + } else { + strindex = starting_index; + inside--; + } + + if (!inside) + break; + } + + +#ifdef POXML_DEBUG + qDebug("split2 into \"%s\" -AAAAAANNNNNNNNNDDDDDDDDDDD- \"%s\"", message.left(strindex).latin1(), message.mid(strindex).latin1()); +#endif + + msg1.msgid = message.left(strindex); + formatMessage(msg1); + + msg2.msgid = message.mid(strindex); + msg2.lines.first().offset += strindex; + formatMessage(msg2); + + if (msg1.lines.first().end_line > msg2.lines.first().start_line || + (msg1.lines.first().end_line == msg2.lines.first().start_line && + msg1.lines.first().end_col > msg2.lines.first().start_col)) + { + msg1.lines.first().end_line = msg2.lines.first().start_line; + msg1.lines.first().end_col = msg2.lines.first().start_col - 1; + } + +#ifdef POXML_DEBUG + qDebug("splited %d-%d(%s) and %d-%d(%s)", msg1.lines.first().end_line,msg1.lines.first().end_col, + msg1.msgid.latin1(), + msg2.lines.first().start_line,msg2.lines.first().start_col, msg2.msgid.latin1()); +#endif + + result = splitMessage(msg1); + result += splitMessage(msg2); + + return result; + } + } +error: + result.append(mb); + return result; +} + +bool StructureParser::endElement( const QString& , const QString&, const QString& qName) +{ + QString tname = qName.lower(); + + // qDebug("endElement %s - %s %d", tname.latin1(), message.latin1(), inside); + + if (inside) { + if (!isSingleTag(qName)) { + message += QString("</%1").arg(tname); + message += QString(" poxml_line=\"%1\"").arg(locator->lineNumber()); + message += QString(" poxml_col=\"%1\"").arg(locator->columnNumber()); + message += ">"; + } + } + + if (isCuttingTag(tname)) { + inside--; + if (!inside) { + MsgBlock m; + descape(message); + m.msgid = message; + + BlockInfo bi; + bi.start_line = startline; + bi.start_col = startcol; + bi.end_line = locator->lineNumber(); + bi.end_col = locator->columnNumber() + 1; + bi.offset = m.lines.first().offset; + m.lines.append(bi); + formatMessage(m); + + MsgList messages = splitMessage(m); + for (MsgList::Iterator it = messages.begin(); + it != messages.end(); ++it) + { +#ifdef POXML_DEBUG + qDebug("parser '%s' %d '%s' %d:%d", (*it).msgid.latin1(), (*it).lines.first().offset, message.mid((*it).lines.first().offset, 15).latin1(), (*it).lines.first().start_line, (*it).lines.first().start_col); +#endif + // if the remaining text still starts with a tag, the poxml_ info + // is most probably more correct + if ((*it).msgid.at(0) == '<' && isClosure((*it).msgid)) { + if (infos_reg.search((*it).msgid) >= 0) { + (*it).lines.first().start_line = infos_reg.cap(1).toInt(); + (*it).lines.first().start_col = infos_reg.cap(2).toInt();; + (*it).lines.first().offset = 0; + } + } + (*it).msgid.replace(infos_reg, QString::null); + + if (!(*it).msgid.isEmpty()) + list.append(*it); + } + } + } + + return true; +} + +bool StructureParser::comment ( const QString &c ) +{ + if (c.left(7) != " TRANS:") + return true; + + assert(false); + return true; +} + +QString StructureParser::escapeLiterals( const QString &_contents) { + QString contents = _contents; + + contents.replace(QRegExp("\n"), "&POXML_LINEFEED;"); + contents.replace(QRegExp("<"), "&POXML_LT;"); + contents.replace(QRegExp(">"), "&POXML_GT;"); + contents.replace(QRegExp("\t"), " "); + contents.replace(QRegExp(" "), "&POXML_SPACE;"); + + return contents; +} + +QString StructureParser::descapeLiterals( const QString &_contents) { + QString contents = _contents; + + contents.replace(QRegExp("&POXML_LINEFEED;"), "\n"); + contents.replace(QRegExp("&POXML_LT;"), "<"); + contents.replace(QRegExp("&POXML_GT;"), ">"); + contents.replace(QRegExp("&POXML_SPACE;"), " "); + contents.replace(QRegExp("!POXML_AMP!"), "&"); + return contents; +} + +void StructureParser::stripWhiteSpace( QString &contents) +{ + contents = contents.stripWhiteSpace(); + bool changed; + do { + changed = false; + if (contents.startsWith("&POXML_LINEFEED;")) { + contents = contents.mid(strlen("&POXML_LINEFEED;"), contents.length()); + changed = true; + } + if (contents.startsWith("&POXML_SPACE;")) { + contents = contents.mid(strlen("&POXML_SPACE;"), contents.length()); + changed = true; + } + if (contents.endsWith("&POXML_LINEFEED;")) { + contents = contents.left(contents.length() - strlen("&POXML_LINEFEED;")); + changed = true; + } + if (contents.endsWith("&POXML_SPACE;")) { + contents = contents.left( contents.length() - strlen("&POXML_SPACE;")); + changed = true; + } + } while (changed); +} + +void StructureParser::cleanupTags( QString &contents ) +{ + contents.replace(QRegExp("&"), "!POXML_AMP!"); + + for (int index = 0; literaltags[index]; index++) { + QRegExp start(QString("<%1[\\s>]").arg(literaltags[index])); + QRegExp end(QString("</%1[\\s>]").arg(literaltags[index])); + int strindex = 0; + while (true) { + strindex = contents.find(start, strindex); + if (strindex < 0) + break; + while (contents.at(strindex) != '>') + strindex++; + strindex++; // one more + int endindex = contents.find(end, strindex); + QString part = contents.mid(strindex, endindex - strindex); + QString newpart = escapeLiterals(part); + contents.replace(strindex, part.length(), newpart); + // this assumes that literal tags to not overlap + strindex = strindex + newpart.length(); + } + } + + QRegExp unclosed("</(\\w*)\\s\\s*>"); + int index = -1; + while (true) { + index = unclosed.search(contents, index + 1); + if (index < 0) + break; + QString tag = unclosed.cap(1); + contents.replace(index, unclosed.matchedLength(), QString("</%1>").arg(tag)); + } + + QRegExp start("<((\\s*[^<>\\s])*)\\s\\s*(/*)>"); + start.setMinimal(true); + + index = -1; + while (true) { + index = start.search(contents, index + 1); + if (index < 0) + break; + QString tag = start.cap(1); + QString cut = start.capturedTexts().last(); + // qDebug("UNCLO %s %d -%s- -%s-", start.cap(0).latin1(), index, tag.latin1(), cut.latin1()); + contents.replace(index, start.matchedLength(), QString("<%1%2>").arg(tag).arg(cut)); + } + QRegExp singletag("<(\\w*)\\s([^><]*)/>"); + + index = -1; + while (true) { + index = singletag.search(contents, index + 1); + if (index < 0) + break; + QString tag = singletag.cap(1); + if (!StructureParser::isSingleTag(tag)) { + contents.replace(index, singletag.matchedLength(), QString("<%1 %2></%3>").arg(tag).arg(singletag.cap(2)).arg(tag)); + } + } + + QRegExp trans_comment("<!-- TRANS:([^<>]*)-->"); + index = -1; + while (true) { + index = trans_comment.search(contents, index + 1); + if (index < 0) + break; + QString msgid = trans_comment.cap(1); + contents.replace(index, trans_comment.matchedLength(), QString("<trans_comment>%1</trans_comment>").arg(msgid)); + } + +#ifdef POXML_DEBUG + qDebug("final %s", contents.latin1()); +#endif + +} + +static bool removeEmptyTag( QString &contents, const QString & tag) +{ +// qDebug("cont %s %s", contents.latin1(), tag.latin1()); + + QRegExp empty(QString("<%1[^>]*>[\\s\n][\\s\n]*</%2\\s*>").arg(tag).arg(tag)); + int strindex = 0; + while (true) { + strindex = contents.find(empty, strindex); + if (strindex < 0) + break; + qDebug("found empty tag %s", tag.latin1()); + contents.replace(strindex, empty.matchedLength(), " "); + strindex++; + return true; + } + return false; +} + +void StructureParser::removeEmptyTags( QString &contents ) +{ + bool removed; + do { + removed = false; + + for (int index = 0; cuttingtags[index]; index++) { + if (removeEmptyTag(contents, cuttingtags[index])) { + removed = true; + break; + } + } + // as glossterm has two different semantics, it's likely + // to break something when it's cuttingtag + if (removeEmptyTag(contents, "glossterm")) + removed = true; + + } while (removed); +} + +bool StructureParser::characters(const QString &ch) +{ + if (inside && !ch.isEmpty()) + message += ch; + return true; +} + +QString escape(QString message) +{ + message.replace(QRegExp("\\\\"), "\\\\"); + message.replace(QRegExp("\""), "\\\""); + return message; +} + +void outputMsg(const char *prefix, const QString &message) +{ + QStringList list = QStringList::split('\n', message, true); + QString line; + + if (list.count() == 1) { + line = list.first(); + if (line.isEmpty()) + cout << prefix << " \"\"\n"; + else + cout << prefix << " \"" << escape(line).utf8().data() << "\"\n"; + } else { + cout << prefix << " \"\"\n"; + for (QStringList::ConstIterator it = list.begin(); it != list.end(); it++) { + line = *it; + if (!line.isEmpty()) { + cout << " \"" << escape(line).utf8().data(); + if (it == list.fromLast()) + cout << "\"\n"; + else + cout << "\\n\"\n"; + } else { + cout << " \""; + if (it != list.fromLast()) + cout << "\\n"; + cout << "\"\n"; + } + } + } +} + +QString escapePO(QString msgid) +{ + int index = 0; + while (true) { + index = msgid.find("\\n", index); + if (index == -1) + break; + if (index >= 1 && msgid.at(index - 1) == '\\' && msgid.at(index - 2) != '\\') { + msgid.replace(index - 1, 3, "&POXML_LITERALLINEFEED;"); + index += 3; + } else + msgid.replace(index, 2, "\n"); + } + index = 0; + while (true) { + index = msgid.find("\\\"", index); + if (index == -1) + break; + if (index > 1 && msgid.at(index - 1) == '\\' && msgid.at(index - 2) != '\\') + msgid.replace(index - 1, 3, "&POXML_LITERALQUOTE;"); + else + msgid.replace(index, 2, "\""); + } + index = 0; + while (true) { + index = msgid.find("\\t", index); + if (index == -1) + break; + if (msgid.at(index - 1) == '\\') + msgid.replace(index - 1, 3, "\\t"); + else + msgid.replace(index, 2, "\t"); + } + index = 0; + while (true) { + index = msgid.find("\\\\", index); + if (index == -1) + break; + msgid.replace(index, 2, "\\"); + index += 1; + } + + msgid.replace(QRegExp("&POXML_LITERALLINEFEED;"), "\\n"); + msgid.replace(QRegExp("&POXML_LITERALQUOTE;"), "\\"); + return msgid; +} + + +MsgList parseXML(const char *filename) +{ + StructureParser handler; + QFile xmlFile( filename ); + xmlFile.open(IO_ReadOnly); + + QCString ccontents; + ccontents.fill(0, xmlFile.size() + 1); + memcpy(ccontents.data(), xmlFile.readAll().data(), xmlFile.size()); + xmlFile.close(); + + QString contents = QString::fromUtf8( ccontents ); + StructureParser::cleanupTags(contents); + + while (true) { + int index = contents.find("<!ENTITY"); + if (index < 0) + break; + int inside = 0; + int endindex = index + 1; + QString replacement = ""; + while (contents.at(endindex) != '>' || inside) + { + switch (contents.at(endindex).latin1()) { + case '<': + inside++; break; + case '>': + inside--; break; + case '\n': + replacement += '\n'; + break; + default: + break; + } + endindex++; + } + endindex++; + contents.replace(index, endindex - index, replacement); + } + + QTextStream ts(contents.utf8(), IO_ReadOnly); + QXmlInputSource source( ts ); + QXmlSimpleReader reader; + reader.setFeature( "http://trolltech.com/xml/features/report-start-end-entity", true); + reader.setContentHandler( &handler ); + reader.setLexicalHandler( &handler ); + reader.setDTDHandler( &handler ); + // reader.setErrorHandler( &handler ); + reader.parse( source ); + MsgList english = handler.getList(); + + bool changed = false; + + do { + changed = false; + QMap<QString, QString> msgids; + + for (MsgList::Iterator it = english.begin(); + it != english.end(); it++) + { + QMap<QString,QString>::Iterator found = msgids.find((*it).msgid); + if ((*it).msgid.length() < 4) { + (*it).msgid = QString("<%1>").arg((*it).tag) + (*it).msgid + + QString("</%1>").arg((*it).tag); + changed = true; + break; + } + if (found != msgids.end()) { + if (found.data() != (*it).tag) { +#ifdef POXML_DEBUG + qDebug("same msgid for '%s' and '%s'", found.data().latin1(), (*it).tag.latin1()); +#endif + changed = true; + QString msgid = (*it).msgid; + for (MsgList::Iterator it2 = english.begin(); + it2 != english.end(); it2++) + { + if ((*it2).msgid == msgid) + (*it2).msgid = QString("<%1>").arg((*it2).tag) + msgid + QString("</%1>").arg((*it2).tag); + } + break; + } + } else { + msgids.insert((*it).msgid, (*it).tag); + } + } + } while (changed); + + return english; +} + diff --git a/poxml/parser.h b/poxml/parser.h new file mode 100644 index 00000000..f63f6cef --- /dev/null +++ b/poxml/parser.h @@ -0,0 +1,124 @@ +#ifndef PARSER_H +#define PARSER_H + +#include <qxml.h> +#include <qmap.h> +#include <qregexp.h> + +struct BlockInfo { + int start_line; + int start_col; + int end_line; + int end_col; + + // used to detect sub-messages + int offset; + + BlockInfo() { + start_line = 0; + start_col = 0; + end_line = 0; + end_col = 0; + + // used to detect sub-messages + offset = 0; + } +}; + +class MsgBlock { + public: + MsgBlock() { start = end = 0; do_not_split = false; } + MsgBlock(const MsgBlock &rhs ) { + *this = rhs; + } + QValueList<BlockInfo> lines; + QString tag; + QString comment; + QString msgid; + QString msgid_plural; + QString msgstr; + QStringList msgstr_plurals; + int start, end; + bool do_not_split; + + void operator=(const MsgBlock& rhs) { + lines = rhs.lines; + tag = rhs.tag; + comment = rhs.comment; + msgid = rhs.msgid; + msgid_plural = rhs.msgid_plural; + msgstr = rhs.msgstr; + msgstr_plurals = rhs.msgstr_plurals; + start = rhs.start; + end = rhs.end; + do_not_split = rhs.do_not_split; + } +}; + +class ParaCounter +{ +public: + ParaCounter() { current = 0; } + void addAnchor(QString anchor) { anchors.insert(anchor, current); } + void increasePara() { current++; } + + QMap<QString, int> anchors; + int current; +}; + +class MsgList : public QValueList<MsgBlock> +{ +public: + MsgList() {} + ParaCounter pc; +}; + +class StructureParser : public QXmlDefaultHandler +{ +public: + bool startDocument(); + bool startElement( const QString&, const QString&, const QString& , + const QXmlAttributes& ); + bool endElement( const QString&, const QString&, const QString& ); + bool characters( const QString &ch); + static bool isCuttingTag(const QString &tag); + static bool isSingleTag(const QString &qName); + static bool isLiteralTag(const QString &qName); + void setDocumentLocator ( QXmlLocator * l ) { locator = l; } + bool skippedEntity ( const QString & name ); + bool fatalError ( const QXmlParseException & ); + bool comment ( const QString & ); + bool error(const QXmlParseException &e ) { return fatalError(e); } + bool warning(const QXmlParseException &e ) { return fatalError(e); } + MsgList getList() const { return list; } + MsgList splitMessage(const MsgBlock &message); + + virtual bool startCDATA(); + virtual bool endCDATA(); + + static bool closureTag(const QString& message, const QString &tag); + static bool isClosure(const QString &message); + static void descape(QString &message); + static QString escapeLiterals( const QString &contents); + static QString descapeLiterals( const QString &contents); + static void cleanupTags( QString &contents ); + static void removeEmptyTags( QString &contents); + static void stripWhiteSpace( QString &contents); + +private: + bool formatMessage(MsgBlock &message) const; + + QXmlLocator *locator; + QString message; + int inside, startline, startcol; + int line; + MsgList list; + mutable QRegExp infos_reg; + mutable QRegExp do_not_split_reg; +}; + +void outputMsg(const char *prefix, const QString &message); +MsgList parseXML(const char *filename); +QString escapePO(QString msgid); + +#endif diff --git a/poxml/po2xml.cpp b/poxml/po2xml.cpp new file mode 100644 index 00000000..9e8bc1a5 --- /dev/null +++ b/poxml/po2xml.cpp @@ -0,0 +1,261 @@ + // #define POXML_DEBUG + +#include "parser.h" +#include <stdlib.h> +#include <iostream> +#include <assert.h> +#include <qregexp.h> + +#include <fstream> +#include "GettextLexer.hpp" +#include "GettextParser.hpp" +#include "antlr/AST.hpp" +#include "antlr/CommonAST.hpp" + +using namespace std; + +QString translate(QString xml, QString orig, QString translation) +{ + QString prefix; + while (xml.at(0) == '<' && orig.at(0) != '<') { + // a XML tag as prefix + int index = xml.find('>'); + assert(index != -1); + index++; + while (xml.at(index) == ' ') + index++; + prefix = prefix + xml.left(index); + xml = xml.mid(index, xml.length()); + } + + int index = xml.find(orig); + if (index == -1) { + qWarning("can't find\n%s\nin\n%s", orig.latin1(), xml.latin1()); + exit(1); + } + if (!translation.isEmpty()) + xml.replace(index, orig.length(), translation); + return prefix + xml; +} + +int main( int argc, char **argv ) +{ + if (argc != 3) { + qWarning("usage: %s english-XML translated-PO", argv[0]); + ::exit(1); + } + + MsgList english = parseXML(argv[1]); + MsgList translated; + + try { + ifstream s(argv[2]); + GettextLexer lexer(s); + GettextParser parser(lexer); + translated = parser.file(); + + } catch(exception& e) { + cerr << "exception: " << e.what() << endl; + return 1; + } + + QMap<QString, QString> translations; + for (MsgList::ConstIterator it = translated.begin(); + it != translated.end(); ++it) + { + QString msgstr; + QString msgid = escapePO((*it).msgid); + if ((*it).comment.find("fuzzy") < 0) + msgstr = escapePO((*it).msgstr); + +#ifdef POXML_DEBUG + qDebug("inserting translations '%s' -> '%s'", msgid.latin1(),msgstr.latin1()); +#endif + translations.insert(msgid, msgstr); + } + + QFile xml(argv[1]); + xml.open(IO_ReadOnly); + QTextStream ds(&xml); + ds.setEncoding(QTextStream::UnicodeUTF8); + QString xml_text = ds.read(); + xml.close(); + QString output; + QTextStream ts(&output, IO_WriteOnly); + StructureParser::cleanupTags(xml_text); + + QValueList<int> line_offsets; + line_offsets.append(0); + int index = 0; + while (true) { + index = xml_text.find('\n', index) + 1; + if (index <= 0) + break; + line_offsets.append(index); + } + + int old_start_line = -1, old_start_col = -1; + QString old_text; + MsgList::Iterator old_it = english.end(); + + for (MsgList::Iterator it = english.begin(); + it != english.end(); ++it) + { + BlockInfo bi = (*it).lines.first(); + int start_pos = line_offsets[bi.start_line - 1] + bi.start_col; + if (!bi.end_line) + continue; + int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1; + + (*it).start = start_pos; + if (old_start_line == bi.start_line && + old_start_col == bi.start_col) + { + (*old_it).end = bi.offset; + (*it).end = end_pos; + } else { + (*it).lines.first().offset = 0; + (*it).end = 0; + } + + old_start_line = bi.start_line; + old_start_col = bi.start_col; + old_it = it; + } + + int old_pos = 0; + + for (MsgList::Iterator it = english.begin(); + it != english.end(); ++it) + { + BlockInfo bi = (*it).lines.first(); + int start_pos = line_offsets[bi.start_line - 1] + bi.start_col; + if (!bi.end_line) + continue; + int end_pos = line_offsets[bi.end_line - 1] + bi.end_col - 1; + + QString xml = xml_text.mid(start_pos, end_pos - start_pos); + int index = 0; + while (true) { + index = xml.find("<!--"); + if (index == -1) + break; + int end_index = index + 4; + while (xml.at(end_index) != '>' || + xml.at(end_index-1) != '-' || + xml.at(end_index-2) != '-') + { + end_index++; + } + xml.replace(index, end_index + 1 - index, " "); + index = end_index; + } + StructureParser::descape(xml); + + QString descaped = StructureParser::descapeLiterals((*it).msgid); + if (translations.contains(descaped)) + descaped = translations[descaped]; + +#ifdef POXML_DEBUG + // assert(!descaped.isEmpty()); +#endif + + if ((*it).msgid.at(0) == '<' && StructureParser::isClosure((*it).msgid)) { + // if the id starts with a tag, then we remembered the + // correct line information and need to strip the target + // now, so it fits + int index = 0; + while ((*it).msgid.at(index) != '>') + index++; + index++; + while ((*it).msgid.at(index) == ' ') + index++; + QString omsgid = (*it).msgid; + (*it).msgid = (*it).msgid.mid(index); + + index = (*it).msgid.length() - 1; + while ((*it).msgid.at(index) != '<') + index--; + + (*it).msgid = (*it).msgid.left(index); + + if (!descaped.isEmpty()) { + if (descaped.at(0) != '<') { + qWarning("the translation of '%s' doesn't start with a tag.", omsgid.latin1()); + exit(1); + } + index = 0; + while (index <= (int)descaped.length() && descaped.at(index) != '>') + index++; + index++; + while (descaped.at(index) == ' ') + index++; + descaped = descaped.mid(index); + + index = descaped.length() - 1; + while (index >= 0 && descaped.at(index) != '<') + index--; + + descaped = descaped.left(index); + } + } + +#ifdef POXML_DEBUG + qDebug("english \"%s\" ORIG \"%s\" %d(%d-%d) %d(%d-%d) %d %d TRANS \"%s\" %d '%s'", xml.latin1(), (*it).msgid.latin1(), + start_pos, bi.start_line, bi.start_col, + end_pos, bi.end_line, bi.end_col, + (*it).lines.first().offset, + (*it).end, + translations[(*it).msgid].latin1(), (*it).end, + descaped.latin1() + ); +#endif + + if ((*it).end) { + if (!(*it).lines.first().offset && end_pos != old_pos) { + assert(start_pos >= old_pos); + ts << xml_text.mid(old_pos, start_pos - old_pos); + } + assert((*it).end >= bi.offset); + ts << translate(xml.mid(bi.offset, (*it).end - bi.offset), + (*it).msgid, descaped); + old_pos = end_pos; + } else { + if (start_pos != old_pos) { + if (start_pos < old_pos) { + qDebug("so far: '%s'", output.latin1()); + } + assert(start_pos > old_pos); + ts << xml_text.mid(old_pos, start_pos - old_pos); + } + old_pos = end_pos; + ts << translate(xml, + (*it).msgid, descaped); + } + } + + ts << xml_text.mid(old_pos); + + output.replace(QRegExp("<trans_comment\\s*>"), ""); + output.replace(QRegExp("</trans_comment\\s*>"), ""); + + StructureParser::removeEmptyTags(output); + + index = 0; + while (true) { + index = output.find(QRegExp(">[^\n]"), index ); + if ( index == -1 ) + break; + if ( output.at( index - 1 ) == '/' || output.at( index - 1 ) == '-' || + output.at( index - 1 ) == ']' || output.at( index - 1 ) == '?' ) + index = index + 1; + else { + output.replace( index, 1, "\n>" ); + index = index + 2; + } + } + output = StructureParser::descapeLiterals(output); + + cout << output.utf8().data(); + return 0; +} diff --git a/poxml/split.cpp b/poxml/split.cpp new file mode 100644 index 00000000..28149ed6 --- /dev/null +++ b/poxml/split.cpp @@ -0,0 +1,162 @@ +#include "parser.h" +#include <stdlib.h> +#include <iostream> + +using namespace std; + +int main( int argc, char **argv ) +{ + bool report_mismatches = qstrcmp(getenv("REPORT_MISMATCHES"), "no"); + + if (argc != 3) { + qWarning("usage: %s english-XML translated-XML", argv[0]); + exit(1); + } + + MsgList english = parseXML(argv[1]); + MsgList translated = parseXML(argv[2]); + + QMap<QString, int>::ConstIterator eit2 = english.pc.anchors.begin(); + + QMap<int, QString> errors; + + while (eit2 != english.pc.anchors.end()) + { + if (eit2.data() == translated.pc.anchors[eit2.key()]) { + QString key = eit2.key(); + eit2++; + translated.pc.anchors.remove(key); + english.pc.anchors.remove(key); + } else { + errors[eit2.data()] = eit2.key(); + eit2++; + } + } + + if (report_mismatches && errors.count()) { + for (QMap<int, QString>::ConstIterator it = errors.begin(); it != errors.end(); ++it) + { + if (translated.pc.anchors.contains(it.data())) + fprintf(stderr, "id=\"%s\" not in the same paragraphs (%d vs %d)\n", it.data().latin1(), + english.pc.anchors[it.data()], translated.pc.anchors[it.data()]); + else { + fprintf(stderr, "id=\"%s\" not in the translated paragraphs (it's in paragraph %d in english)\n", + it.data().latin1(), english.pc.anchors[it.data()]); + } + } + ::exit(1); + } + + MsgList::ConstIterator tit = translated.begin(); + for (MsgList::Iterator it = english.begin(); + it != english.end() && tit != translated.end(); + ++tit, ++it) + { + (*it).msgstr = (*tit).msgid; + } + + bool have_roles_of_translators = false; + bool have_credit_for_translators = false; + + QMap<QString, int> msgids; + int index = 0; + + for (MsgList::Iterator it = english.begin(); + it != english.end(); ) + { + if ((*it).msgid == "ROLES_OF_TRANSLATORS") { + if ((*it).msgstr.length() && !(*it).msgstr.contains("ROLES_OF_TRANSLATORS")) { + have_roles_of_translators = true; + } + else { + MsgList::Iterator tmp = it; + ++it; + english.remove(tmp); + } + continue; + } + + if ((*it).msgid == "CREDIT_FOR_TRANSLATORS") { + if ((*it).msgstr.length() && !(*it).msgstr.contains("CREDIT_FOR_TRANSLATORS")) { + have_credit_for_translators = true; + } + else { + MsgList::Iterator tmp = it; + ++it; + english.remove(tmp); + } + continue; + } + + if (msgids.contains((*it).msgid)) { + english[msgids[(*it).msgid]].lines += (*it).lines; + if (english[msgids[(*it).msgid]].msgstr != (*it).msgstr) { + fprintf(stderr, "two different translations for \"%s\" (\"%s\" and \"%s\") - choosing first one\n", + (*it).msgid.latin1(), + english[msgids[(*it).msgid]].msgstr.local8Bit().data(), + (*it).msgstr.local8Bit().data()); + + } + MsgList::Iterator tmp = it; + it++; + english.remove(tmp); + } else { + msgids.insert((*it).msgid, index); + index++; + it++; + } + } + + int counter = 1; + + while (tit != translated.end()) + { + MsgBlock mb; + mb.msgid = QString::fromLatin1("appended paragraph %1").arg(counter++); + mb.msgstr = (*tit).msgid; + mb.lines += (*tit).lines; + english.append(mb); + tit++; + } + + cout << "#, fuzzy\n"; + cout << "msgid \"\"\n"; + cout << "msgstr \"\"\n"; + cout << "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n"; + cout << "\"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n\"\n"; + cout << "\"Content-Type: text/plain; charset=utf-8\\n\"\n"; + + for (MsgList::ConstIterator it = english.begin(); + it != english.end(); ++it) + { + cout << "#: "; + for (QValueList<BlockInfo>::ConstIterator it2 = + (*it).lines.begin(); it2 != (*it).lines.end(); it2++) { + if (it2 != (*it).lines.begin()) + cout << ", "; + cout << "index.docbook:" << (*it2).start_line; + + } + cout << "\n"; + outputMsg("msgid", StructureParser::descapeLiterals( (*it).msgid )); + outputMsg("msgstr", StructureParser::descapeLiterals( (*it).msgstr )); + cout << "\n"; + } + + if ( !getenv( "NO_CREDITS" ) ) { + + if ( !have_roles_of_translators ) { + outputMsg("msgid", "ROLES_OF_TRANSLATORS"); + outputMsg("msgstr", "<!--TRANS:ROLES_OF_TRANSLATORS-->"); + cout << "\n"; + } + + if ( !have_credit_for_translators) { + outputMsg("msgid", "CREDIT_FOR_TRANSLATORS"); + outputMsg("msgstr", "<!--TRANS:CREDIT_FOR_TRANSLATORS-->"); + cout << "\n"; + } + } + + return 0; +} diff --git a/poxml/swappo.cpp b/poxml/swappo.cpp new file mode 100644 index 00000000..94c308ae --- /dev/null +++ b/poxml/swappo.cpp @@ -0,0 +1,38 @@ +#include <iostream> +using namespace std; +#include "GettextParser.hpp" +#include <fstream> +#include "GettextLexer.hpp" + +int main(int argc, char **argv) +{ + if ( argc != 2 ) { + qWarning( "usage: %s pofile", argv[0] ); + return -1; + } + + MsgList translated; + + try { + ifstream s(argv[1]); + GettextLexer lexer(s); + GettextParser parser(lexer); + translated = parser.file(); + + } catch(exception& e) { + cerr << "exception: " << e.what() << endl; + return 1; + } + + for (MsgList::ConstIterator it = translated.begin(); + it != translated.end(); ++it) + { + if ( !( *it ).msgstr.isEmpty() ) { + outputMsg("msgid", (*it).msgstr); + outputMsg("msgstr", (*it).msgid); + cout << "\n"; + } + } + +} + diff --git a/poxml/transxx.cpp b/poxml/transxx.cpp new file mode 100644 index 00000000..dc3dde00 --- /dev/null +++ b/poxml/transxx.cpp @@ -0,0 +1,130 @@ +#include <iostream> +using namespace std; +#include "GettextParser.hpp" +#include <fstream> +#include "GettextLexer.hpp" + +#include <qregexp.h> +#include <qdatetime.h> +#include <qfileinfo.h> + +int main(int argc, char **argv) +{ + if ( argc != 2 && argc != 4 ) { + qWarning( "usage: %s [--text translation] potfile", argv[0] ); + return -1; + } + + QString translation = "xx"; + QCString filename; + + if( argc == 4 ) { + if( argv[1]!=QString("--text") ) { + qWarning( "usage: %s [--text translation] potfile", argv[0] ); + return -1; + } + translation = QString::fromLocal8Bit(argv[2]); + filename = argv[3]; + } else { + filename = argv[1]; + } + + MsgList translated; + + try { + ifstream s(filename); + GettextLexer lexer(s); + GettextParser parser(lexer); + translated = parser.file(); + + } catch(exception& e) { + cerr << "exception: " << e.what() << endl; + return 1; + } + + const bool is_desktop = filename.find( "desktop_") >= 0; + + // The header is the last item (due too the sorting) + MsgList::const_iterator header = --translated.end(); + if ( ( header == translated.end() ) || ( ! ( *header ).msgid.isEmpty() ) ) + { + cerr << "Cannot find correct header msgid\n"; + cout << "\"Content-Type: text/plain; charset=utf-8\\n\"\n"; + cout << "\"Plural-Forms: nplurals=1; plural=0;\\n\"\n"; + } + else + { + QStringList headerLines = QStringList::split( "\\n", ( *header ).msgstr, false ); + QFileInfo fi( QString::fromLocal8Bit( filename ) ); + QString projectId( "Project-Id-Version: " ); + projectId += fi.baseName( false ); + headerLines.gres( QRegExp( "^Project-Id-Version:.*" ), projectId ); + headerLines.gres( QRegExp( "^Last-Translator:.*" ), "Last-Translator: transxx program <null@kde.org>" ); + headerLines.gres( QRegExp( "^Language-Team:.*" ), "Language-Team: Test Language <kde-i18n-doc@kde.org>" ); + QString revisionDate ( "PO-Revision-Date: " ); + const QDateTime dt = QDateTime::currentDateTime( Qt::UTC ); + revisionDate += dt.toString( "yyyy-MM-dd hh:mm+0000" ); + headerLines.gres( QRegExp( "^PO-Revision-Date:.*" ), revisionDate ); + headerLines << "Plural-Forms: nplurals=1; plural=0;"; + outputMsg ( "msgid", "" ); + outputMsg ( "msgstr", escapePO( headerLines.join("\\n") + "\\n" ) ); + } + cout << "\n"; + + for (MsgList::ConstIterator it = translated.begin(); + it != translated.end(); ++it) + { + QString msgid = ( *it ).msgid; + QString msgid_plural = ( *it ).msgid_plural; + if ( !msgid.isEmpty() ) { + outputMsg("msgid", escapePO( msgid) ); + + if ( ! msgid_plural.isEmpty() ) { + outputMsg("msgid_plural", escapePO( msgid_plural ) ); + } + + QString msgstr; + + if ( msgid.find( "Definition of PluralForm" ) != -1 ) { + outputMsg("msgstr", "NoPlural"); + cout << "\n"; + continue; + } + + if ( is_desktop ) { + msgstr = msgid.left( msgid.find( '=' ) + 1); + msgstr += translation + msgid.mid( msgid.find( '=' ) + 1) + translation; + outputMsg( "msgstr", escapePO(msgstr) ); + cout << "\n"; + continue; + } + + if (msgid.startsWith("_n: ") || msgid.startsWith("_: ") ) { // KDE extentions + msgid = msgid.mid(msgid.find("\\n") + 2, msgid.length()); + } + + if (msgid.endsWith("%")) + msgstr = translation + msgid + " " + translation; + else + msgstr = translation + msgid + translation; + + // Note: msgid has been modified, so we need to go back to the original version by the help of the iterator + // (Gettext is not aware of the KDE-specific handling, so it really wants a \n at start and at end in the msgstr if they were in the msgid ) + if ( ( *it ).msgid.endsWith( "\\n" ) && ! ( *it ).msgid.endsWith( "\\\\n" )) + msgstr += "\n"; + if ( ( *it ).msgid.startsWith( "\\n" ) ) + msgstr.prepend( "\n" ); + + if ( msgid_plural.isEmpty() ) { + outputMsg("msgstr", escapePO( msgstr) ); + } + else + { + outputMsg("msgstr[0]", escapePO( msgstr) ); + } + cout << "\n"; + } + } + +} + diff --git a/poxml/xml2pot.cpp b/poxml/xml2pot.cpp new file mode 100644 index 00000000..593e75be --- /dev/null +++ b/poxml/xml2pot.cpp @@ -0,0 +1,77 @@ +#include "parser.h" +#include <stdlib.h> +#include <iostream> +#include <qfileinfo.h> +#include <qdatetime.h> + +using namespace std; + +int main( int argc, char **argv ) +{ + if (argc != 2) { + qWarning("usage: %s english-XML", argv[0]); + exit(1); + } + + MsgList english = parseXML(argv[1]); + + QMap<QString, int> msgids; + int index = 0; + + for (MsgList::Iterator it = english.begin(); + it != english.end(); ) + { + if (msgids.contains((*it).msgid)) { + english[msgids[(*it).msgid]].lines += (*it).lines; + MsgList::Iterator tmp = it; + it++; + english.remove(tmp); + } else { + msgids.insert((*it).msgid, index); + index++; + it++; + } + } + + const QDateTime now = QDateTime::currentDateTime( Qt::UTC ); + + cout << "# SOME DESCRIPTIVE TITLE.\n"; + cout << "# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n"; + cout << "#\n"; + cout << "#, fuzzy\n"; + cout << "msgid \"\"\n"; + cout << "msgstr \"\"\n"; + cout << "\"Project-Id-Version: PACKAGE VERSION\\n\"\n"; + cout << "\"Report-Msgid-Bugs-To: http://bugs.kde.org\\n\"\n"; + cout << "\"POT-Creation-Date: " << now.toString("yyyy-MM-dd hh:mm").utf8().data() << "+0000\\n\"\n"; + cout << "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n"; + cout << "\"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n\"\n"; + cout << "\"Language-Team: LANGUAGE <kde-i18n-doc@kde.org>\\n\"\n"; + cout << "\"MIME-Version: 1.0\\n\"\n"; + cout << "\"Content-Type: application/x-xml2pot; charset=UTF-8\\n\"\n"; + cout << "\"Content-Transfer-Encoding: 8bit\\n\"\n"; + cout << "\n"; + + const QString fname = QFileInfo(argv[1]).fileName(); + + for (MsgList::ConstIterator it = english.begin(); + it != english.end(); ++it) + { + cout << "#. Tag: " << (*it).tag.utf8() << endl; + cout << "#: "; + for (QValueList<BlockInfo>::ConstIterator it2 = + (*it).lines.begin(); it2 != (*it).lines.end(); it2++) { + if (it2 != (*it).lines.begin()) + cout << " "; + cout << fname.utf8().data() << ":" << (*it2).start_line; + + } + cout << "\n"; + cout << "#, no-c-format\n"; + outputMsg("msgid", StructureParser::descapeLiterals( (*it).msgid )); + outputMsg("msgstr", (*it).msgstr ); + cout << "\n"; + } + + return 0; +} |