diff options
Diffstat (limited to 'poxml/antlr/src')
30 files changed, 3503 insertions, 0 deletions
diff --git a/poxml/antlr/src/ANTLRException.cpp b/poxml/antlr/src/ANTLRException.cpp new file mode 100644 index 00000000..42632e71 --- /dev/null +++ b/poxml/antlr/src/ANTLRException.cpp @@ -0,0 +1,57 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/ANTLRException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +ANTLRException::ANTLRException() : text("") +{} + +ANTLRException::ANTLRException(const ANTLR_USE_NAMESPACE(std)string& s) +: text(s) +{} + +ANTLRException::~ANTLRException() throw() +{} + +ANTLR_USE_NAMESPACE(std)string ANTLRException::toString() const +{ return text; } + +ANTLR_USE_NAMESPACE(std)string ANTLRException::getMessage() const +{ return text; } + +const char* ANTLRException::what() const throw() +{ return text.c_str(); } + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/ASTFactory.cpp b/poxml/antlr/src/ASTFactory.cpp new file mode 100644 index 00000000..e44386f7 --- /dev/null +++ b/poxml/antlr/src/ASTFactory.cpp @@ -0,0 +1,218 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/ASTFactory.hpp" +#include "antlr/CommonAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** AST Support code shared by TreeParser and Parser. + * We use delegation to share code (and have only one + * bit of code to maintain) rather than subclassing + * or superclassing (forces AST support code to be + * loaded even when you don't want to do AST stuff). + * + * Typically, setASTNodeType is used to specify the + * type of node to create, but you can override + * create to make heterogeneous nodes etc... + */ + +ASTFactory::ASTFactory() : nodeFactory(&CommonAST::factory) +{ +} + +/** Add a child to the current AST */ +void ASTFactory::addASTChild(ASTPair& currentAST, RefAST child) +{ + if (child) { + if (!currentAST.root) { + // Make new child the current root + currentAST.root = child; + } + else { + if (!currentAST.child) { + // Add new child to current root + currentAST.root->setFirstChild(child); + } + else { + currentAST.child->setNextSibling(child); + } + } + // Make new child the current child + currentAST.child = child; + currentAST.advanceChildToEnd(); + } +} +/** Create a new empty AST node; if the user did not specify + * an AST node type, then create a default one: CommonAST. + */ +RefAST ASTFactory::create() +{ + RefAST node = nodeFactory(); + node->setType(Token::INVALID_TYPE); + return node; +} + +RefAST ASTFactory::create(int type) +{ + RefAST t = nodeFactory(); + t->initialize(type,""); + return t; +} + +RefAST ASTFactory::create(int type, const ANTLR_USE_NAMESPACE(std)string& txt) +{ + RefAST t = nodeFactory(); + t->initialize(type,txt); + return t; +} + +/** Create a new empty AST node; if the user did not specify + * an AST node type, then create a default one: CommonAST. + */ +RefAST ASTFactory::create(RefAST tr) +{ + if (!tr) + return nullAST; + + RefAST t = nodeFactory(); + t->initialize(tr); + return t; +} + +RefAST ASTFactory::create(RefToken tok) +{ + RefAST t = nodeFactory(); + t->initialize(tok); + return t; +} +/** Copy a single node. clone() is not used because + * we want to return an AST not a plain object...a type + * safety issue. Further, we want to have all AST node + * creation go through the factory so creation can be + * tracked. Returns null if t is null. + */ +RefAST ASTFactory::dup(RefAST t) +{ + return create(t); // if t==null, create returns null +} + +/** Duplicate tree including siblings of root. */ +RefAST ASTFactory::dupList(RefAST t) +{ + RefAST result = dupTree(t); // if t == null, then result==null + RefAST nt = result; + while (t) { // for each sibling of the root + t = t->getNextSibling(); + nt->setNextSibling(dupTree(t)); // dup each subtree, building new tree + nt = nt->getNextSibling(); + } + return result; +} +/**Duplicate a tree, assuming this is a root node of a tree-- + * duplicate that node and what's below; ignore siblings of root node. + */ +RefAST ASTFactory::dupTree(RefAST t) +{ + RefAST result = dup(t); // make copy of root + // copy all children of root. + if (t) { + result->setFirstChild( dupList(t->getFirstChild()) ); + } + return result; +} +/** Make a tree from a list of nodes. The first element in the + * array is the root. If the root is null, then the tree is + * a simple list not a tree. Handles null children nodes correctly. + * For example, build(a, b, null, c) yields tree (a b c). build(null,a,b) + * yields tree (nil a b). + */ +RefAST ASTFactory::make(ANTLR_USE_NAMESPACE(std)vector<RefAST> nodes) +{ + if ( nodes.size()==0 ) + return RefAST(nullASTptr); + RefAST root = nodes[0]; + RefAST tail = RefAST(nullASTptr); + if (root) { + root->setFirstChild(RefAST(nullASTptr)); // don't leave any old pointers set + } + // link in children; + for (unsigned int i=1; i<nodes.size(); i++) { + if ( !nodes[i] ) continue; // ignore null nodes + if ( !root ) { + // Set the root and set it up for a flat list + root = tail = nodes[i]; + } + else if ( !tail ) { + root->setFirstChild(nodes[i]); + tail = root->getFirstChild(); + } + else { + tail->setNextSibling(nodes[i]); + tail = tail->getNextSibling(); + } + // Chase tail to last sibling + while (tail->getNextSibling()) { + tail = tail->getNextSibling(); + } + } + return root; +} +/** Make a tree from a list of nodes, where the nodes are contained + * in an ASTArray object + */ +RefAST ASTFactory::make(ASTArray* nodes) +{ + RefAST ret = make(nodes->array); + delete nodes; + return ret; +} +/** Make an AST the root of current AST */ +void ASTFactory::makeASTRoot(ASTPair& currentAST, RefAST root) +{ + if (root) { + // Add the current root as a child of new root + root->addChild(currentAST.root); + // The new current child is the last sibling of the old root + currentAST.child = currentAST.root; + currentAST.advanceChildToEnd(); + // Set the new root + currentAST.root = root; + } +} +void ASTFactory::setASTNodeFactory(factory_type factory) +{ + nodeFactory = factory; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/ASTRefCount.cpp b/poxml/antlr/src/ASTRefCount.cpp new file mode 100644 index 00000000..1da98306 --- /dev/null +++ b/poxml/antlr/src/ASTRefCount.cpp @@ -0,0 +1,74 @@ +#include "antlr/ASTRefCount.hpp" +#include "antlr/AST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +ASTRef::ASTRef(AST* p) + : ptr(p), count(1) +{ + if (p && !p->ref) + p->ref = this; +} + +ASTRef::~ASTRef() +{ + delete ptr; +} + +ASTRef* ASTRef::increment() +{ + ++count; + return this; +} + +bool ASTRef::decrement() +{ + return (--count==0); +} + +ASTRef* ASTRef::getRef(const AST* p) +{ + if (p) { + AST* pp = const_cast<AST*>(p); + if (pp->ref) + return pp->ref->increment(); + else + return new ASTRef(pp); + } else + return 0; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/BaseAST.cpp b/poxml/antlr/src/BaseAST.cpp new file mode 100644 index 00000000..4080e0e8 --- /dev/null +++ b/poxml/antlr/src/BaseAST.cpp @@ -0,0 +1,320 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/BaseAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +//bool BaseAST::verboseStringConversion; +//ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> BaseAST::tokenNames; + +void BaseAST::addChild(RefAST c) +{ + if (!c) + return; + RefBaseAST tmp=down; + if (tmp) { + while (tmp->right) + tmp=tmp->right; + tmp->right=c; + } else { + down=c; + } +} + +void BaseAST::doWorkForFindAll( + ANTLR_USE_NAMESPACE(std)vector<RefAST>& v, + RefAST target,bool partialMatch) +{ + // Start walking sibling lists, looking for matches. + for (RefAST sibling=this; + sibling; + sibling=sibling->getNextSibling()) + { + if ( (partialMatch && sibling->equalsTreePartial(target)) || + (!partialMatch && sibling->equalsTree(target)) ) { + v.push_back(sibling); + } + // regardless of match or not, check any children for matches + if ( sibling->getFirstChild() ) { + RefBaseAST(sibling->getFirstChild())->doWorkForFindAll(v, target, partialMatch); + } + } + +} + +/** Is node t equal to this in terms of token type and text? */ +bool BaseAST::equals(RefAST t) const +{ + if (!t) + return false; + return (getText() == t->getText()) && (getType() == t->getType()); +} + +/** Is t an exact structural and equals() match of this tree. The + * 'this' reference is considered the start of a sibling list. + */ +bool BaseAST::equalsList(RefAST t) const +{ + // the empty tree is not a match of any non-null tree. + if (!t) + return false; + + // Otherwise, start walking sibling lists. First mismatch, return false. + RefAST sibling=this; + for (;sibling && t; + sibling=sibling->getNextSibling(), t=t->getNextSibling()) { + // as a quick optimization, check roots first. + if (!sibling->equals(t)) + return false; + // if roots match, do full list match test on children. + if (sibling->getFirstChild()) { + if (!sibling->getFirstChild()->equalsList(t->getFirstChild())) + return false; + } + // sibling has no kids, make sure t doesn't either + else if (t->getFirstChild()) + return false; + } + + if (!sibling && !t) + return true; + + // one sibling list has more than the other + return false; +} + +/** Is 'sub' a subtree of this list? + * The siblings of the root are NOT ignored. + */ +bool BaseAST::equalsListPartial(RefAST sub) const +{ + // the empty tree is always a subset of any tree. + if (!sub) + return true; + + // Otherwise, start walking sibling lists. First mismatch, return false. + RefAST sibling=this; + for (;sibling && sub; + sibling=sibling->getNextSibling(), sub=sub->getNextSibling()) { + // as a quick optimization, check roots first. + if (!sibling->equals(sub)) + return false; + // if roots match, do partial list match test on children. + if (sibling->getFirstChild()) + if (!sibling->getFirstChild()->equalsListPartial(sub->getFirstChild())) + return false; + } + + if (!sibling && sub) + // nothing left to match in this tree, but subtree has more + return false; + + // either both are null or sibling has more, but subtree doesn't + return true; +} + +/** Is tree rooted at 'this' equal to 't'? The siblings + * of 'this' are ignored. + */ +bool BaseAST::equalsTree(RefAST t) const +{ + // check roots first + if (!equals(t)) + return false; + // if roots match, do full list match test on children. + if (getFirstChild()) { + if (!getFirstChild()->equalsList(t->getFirstChild())) + return false; + } + // sibling has no kids, make sure t doesn't either + else if (t->getFirstChild()) + return false; + + return true; +} + +/** Is 'sub' a subtree of the tree rooted at 'this'? The siblings + * of 'this' are ignored. + */ +bool BaseAST::equalsTreePartial(RefAST sub) const +{ + // the empty tree is always a subset of any tree. + if (!sub) + return true; + + // check roots first + if (!equals(sub)) + return false; + // if roots match, do full list partial match test on children. + if (getFirstChild()) + if (!getFirstChild()->equalsListPartial(sub->getFirstChild())) + return false; + + return true; +} + +/** Walk the tree looking for all exact subtree matches. Return + * an ASTEnumerator that lets the caller walk the list + * of subtree roots found herein. + */ +ANTLR_USE_NAMESPACE(std)vector<RefAST> BaseAST::findAll(RefAST target) +{ + ANTLR_USE_NAMESPACE(std)vector<RefAST> roots; + + // the empty tree cannot result in an enumeration + if (target) { + doWorkForFindAll(roots,target,false); // find all matches recursively + } + + return roots; +} + +/** Walk the tree looking for all subtrees. Return + * an ASTEnumerator that lets the caller walk the list + * of subtree roots found herein. + */ +ANTLR_USE_NAMESPACE(std)vector<RefAST> BaseAST::findAllPartial(RefAST target) +{ + ANTLR_USE_NAMESPACE(std)vector<RefAST> roots; + + // the empty tree cannot result in an enumeration + if (target) { + doWorkForFindAll(roots,target,true); // find all matches recursively + } + + return roots; +} + +RefAST BaseAST::getFirstChild() const +{ + return RefAST(down); +} + +RefAST BaseAST::getNextSibling() const +{ + return RefAST(right); +} + +ANTLR_USE_NAMESPACE(std)string BaseAST::getText() const +{ + return ""; +} + +int BaseAST::getType() const +{ + return 0; +} + +void BaseAST::removeChildren() +{ + down=nullAST; +} + +void BaseAST::setFirstChild(RefAST c) +{ + down=c; +} + +void BaseAST::setNextSibling(RefAST n) +{ + right=n; +} + +void BaseAST::setText(const ANTLR_USE_NAMESPACE(std)string& txt) +{ +} + +void BaseAST::setType(int type) +{ +} + +//void BaseAST::setVerboseStringConversion(bool verbose, +// const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& names) +//{ +// verboseStringConversion = verbose; +// tokenNames = names; +//} + +ANTLR_USE_NAMESPACE(std)string BaseAST::toString() const +{ +// if ( verboseStringConversion && +// !getText().equalsIgnoreCase(tokenNames[getType()]) && +// !getText().equalsIgnoreCase(Tool.stripFrontBack(tokenNames[getType()],"\"","\"")) ) { +// b.append('['); +// b.append(getText()); +// b.append(",<"); +// b.append(tokenNames[getType()]); +// b.append(">]"); +// return b.toString(); +// } + return getText(); +} + +ANTLR_USE_NAMESPACE(std)string BaseAST::toStringList() const +{ + ANTLR_USE_NAMESPACE(std)string ts=""; + if (getFirstChild()) { + ts+=" ( "; + ts+=toString(); + ts+=getFirstChild()->toStringList(); + ts+=" )"; + } else { + ts+=" "; + ts+=toString(); + } + if (getNextSibling()) + ts+=getNextSibling()->toStringList(); + return ts; +} + +ANTLR_USE_NAMESPACE(std)string BaseAST::toStringTree() const +{ + ANTLR_USE_NAMESPACE(std)string ts=""; + if (getFirstChild()) { + ts+=" ( "; + ts+=toString(); + ts+=getFirstChild()->toStringList(); + ts+=" )"; + } else { + ts+=" "; + ts+=toString(); + } + return ts; +} + +// this is nasty, but it makes the code generation easier +RefAST nullAST; +AST* const nullASTptr=0; + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/BitSet.cpp b/poxml/antlr/src/BitSet.cpp new file mode 100644 index 00000000..a0a1b110 --- /dev/null +++ b/poxml/antlr/src/BitSet.cpp @@ -0,0 +1,76 @@ +#include "antlr/BitSet.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** A BitSet to replace java.util.BitSet. + * Primary differences are that most set operators return new sets + * as opposed to oring and anding "in place". Further, a number of + * operations were added. I cannot contain a BitSet because there + * is no way to access the internal bits (which I need for speed) + * and, because it is final, I cannot subclass to add functionality. + * Consider defining set degree. Without access to the bits, I must + * call a method n times to test the ith bit...ack! + * + * Also seems like or() from util is wrong when size of incoming set is bigger + * than this.length. + * + * This is a C++ version of the Java class described above, with only + * a handful of the methods implemented, because we don't need the + * others at runtime. It's really just a wrapper around vector<bool>, + * which should probably be changed to a wrapper around bitset, once + * bitset is more widely available. + * + * @author Terence Parr, MageLang Institute + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ +BitSet::BitSet(int nbits) + : storage(nbits) +{ + for (int i=0;i<nbits;i++) { + storage[i] = false; + } +} + +BitSet::BitSet(const unsigned long* bits_,int nlongs) + : storage(nlongs*32) +{ + for ( int i = 0 ; i < nlongs*32; i++) { + storage[i] = (bits_[i>>5] & (1UL << (i&31))) ? true : false; + } +} + +BitSet::~BitSet() +{ +} + +void BitSet::add(int el) +{ + if ( el < 0 ) + throw ANTLR_USE_NAMESPACE(std)out_of_range(ANTLR_USE_NAMESPACE(std)string("antlr::BitSet.cpp line 49")); + + if( static_cast<unsigned int>(el) >= storage.size() ) + storage.resize( el+1, false ); + + storage[el] = true; +} + +bool BitSet::member(int el) const +{ + if ( el < 0 || static_cast<unsigned int>(el) >= storage.size()) + return false; + + return storage[el]; +} + +ANTLR_USE_NAMESPACE(std)vector<int> BitSet::toArray() const +{ + ANTLR_USE_NAMESPACE(std)vector<int> elems; + for (unsigned int i=0;i<storage.size();i++) { + if (storage[i]) + elems.push_back(i); + } + + return elems; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/CharBuffer.cpp b/poxml/antlr/src/CharBuffer.cpp new file mode 100644 index 00000000..a43eb153 --- /dev/null +++ b/poxml/antlr/src/CharBuffer.cpp @@ -0,0 +1,67 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/**A Stream of characters fed to the lexer from a InputStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input characters. Normally, + * "k" characters are stored in the buffer. More characters may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of characters is deferred. In other words, reading the next + * character is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.CharQueue + */ + +#include "antlr/CharBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** Create a character buffer */ +CharBuffer::CharBuffer(ANTLR_USE_NAMESPACE(std)istream& input_) +: input(input_) +{} + +/** Get the next character from the stream */ +int CharBuffer::getChar() +{ +// try { + return input.get(); +// } +// catch (???& e) { +// throw CharStreamIOException(e); +// } +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CharScanner.cpp b/poxml/antlr/src/CharScanner.cpp new file mode 100644 index 00000000..ff40138d --- /dev/null +++ b/poxml/antlr/src/CharScanner.cpp @@ -0,0 +1,430 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/CharScanner.hpp" +#include "antlr/CommonToken.hpp" +#include "antlr/MismatchedCharException.hpp" +#include <map> + +#ifdef HAS_NOT_CCTYPE_H +#include <ctype.h> +#else +#include <cctype> +#endif + +#include <iostream> + +#ifdef HAS_NOT_CSTRING_H +#include <string> +#else +#include <cstring> +#endif +#include <stdlib.h> + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(exit) +ANTLR_C_USING(tolower) + +#ifdef ANTLR_REALLY_NO_STRCASECMP +// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior +// on the mac has neither... +inline int strcasecmp(const char *s1, const char *s2) +{ + while (true) + { + char c1 = tolower(*s1++), + c2 = tolower(*s2++); + if (c1 < c2) return -1; + if (c1 > c2) return 1; + if (c1 == 0) return 0; + } +} +#else +#ifdef NO_STRCASECMP +ANTLR_C_USING(stricmp) +#else +ANTLR_C_USING(strcasecmp) +#endif +#endif + +CharScannerLiteralsLess::CharScannerLiteralsLess(const CharScanner* theScanner) +: scanner(theScanner) +{} + +bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const +{ + if (scanner->getCaseSensitiveLiterals()) { + return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y); + } else { +#ifdef NO_STRCASECMP + return (stricmp(x.c_str(),y.c_str())<0); +#else + return (strcasecmp(x.c_str(),y.c_str())<0); +#endif + } +} + +CharScanner::CharScanner(InputBuffer& cb) + : saveConsumedInput(true) //, caseSensitiveLiterals(true) + , literals(CharScannerLiteralsLess(this)) + , inputState(new LexerInputState(cb)) + , commitToPath(false) + , traceDepth(0) +{ + setTokenObjectFactory(&CommonToken::factory); +} + +CharScanner::CharScanner(InputBuffer* cb) + : saveConsumedInput(true) //, caseSensitiveLiterals(true) + , literals(CharScannerLiteralsLess(this)) + , inputState(new LexerInputState(cb)) + , commitToPath(false) + , traceDepth(0) +{ + setTokenObjectFactory(&CommonToken::factory); +} + +CharScanner::CharScanner(const LexerSharedInputState& state) + : saveConsumedInput(true) //, caseSensitiveLiterals(true) + , literals(CharScannerLiteralsLess(this)) + , inputState(state) + , commitToPath(false) + , traceDepth(0) +{ + setTokenObjectFactory(&CommonToken::factory); +} + +CharScanner::~CharScanner() +{ +} + +void CharScanner::append(char c) +{ + if (saveConsumedInput) { + int l = text.length(); + if ((l%256) == 0) text.reserve(l+256); + text.replace(l,0,&c,1); + } +} + +void CharScanner::append(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if (saveConsumedInput) + text+=s; +} + +void CharScanner::commit() +{ + inputState->getInput().commit(); +} + +void CharScanner::consume() +{ + if (inputState->guessing == 0) { + int c = LA(1); + if (caseSensitive) { + append(c); + } else { + // use input.LA(), not LA(), to get original case + // CharScanner.LA() would toLower it. + append(inputState->getInput().LA(1)); + } + if (c == '\t') { + tab(); + } + else { + inputState->column++; + } + } + inputState->getInput().consume(); +} + +/** Consume chars until one matches the given char */ +void CharScanner::consumeUntil(int c) +{ + while (LA(1) != EOF_CHAR && LA(1) != c) + { + consume(); + } +} + +/** Consume chars until one matches the given set */ +void CharScanner::consumeUntil(const BitSet& set) +{ + while (LA(1) != EOF_CHAR && !set.member(LA(1))) { + consume(); + } +} + +bool CharScanner::getCaseSensitive() const +{ return caseSensitive; } + +//bool CharScanner::getCaseSensitiveLiterals() const +//{ return caseSensitiveLiterals; } + +int CharScanner::getColumn() const +{ return inputState->column; } + +void CharScanner::setColumn(int c) +{ inputState->column = c; } + +bool CharScanner::getCommitToPath() const +{ return commitToPath; } + +const ANTLR_USE_NAMESPACE(std)string& CharScanner::getFilename() const +{ return inputState->filename; } + +InputBuffer& CharScanner::getInputBuffer() +{ return inputState->getInput(); } + +LexerSharedInputState CharScanner::getInputState() +{ return inputState; } + +int CharScanner::getLine() const +{ return inputState->line; } + +/** return a copy of the current text buffer */ +const ANTLR_USE_NAMESPACE(std)string& CharScanner::getText() const +{ return text; } + +RefToken CharScanner::getTokenObject() const +{ return _returnToken; } + +RefToken CharScanner::makeToken(int t) +{ + RefToken tok=tokenFactory(); + tok->setType(t); + tok->setColumn(inputState->tokenStartColumn); + tok->setLine(inputState->tokenStartLine); + return tok; +} + +int CharScanner::mark() +{ + return inputState->getInput().mark(); +} + +void CharScanner::match(int c) +{ + if ( LA(1) != c ) { + throw MismatchedCharException(LA(1),c,false,this); + } + consume(); +} + +void CharScanner::match(const BitSet& b) +{ + if (!b.member(LA(1))) { + throw MismatchedCharException(LA(1),b,false,this); + } + consume(); +} + +void CharScanner::match(const ANTLR_USE_NAMESPACE(std)string& s) +{ + int len = s.length(); + for (int i=0; i<len; i++) { + if ( LA(1) != s[i] ) { + throw MismatchedCharException(LA(1),s[i],false,this); + } + consume(); + } +} + +void CharScanner::matchNot(int c) +{ + if ( LA(1) == c ) { + throw MismatchedCharException(LA(1),c,true,this); + } + consume(); +} + +void CharScanner::matchRange(int c1, int c2) +{ + if (LA(1)<c1 || LA(1)>c2) { + throw MismatchedCharException(LA(1),c1,c2,false,this); + } + consume(); +} + +void CharScanner::newline() +{ + ++inputState->line; + inputState->column=1; +} + +/** advance the current column number by an appropriate amount. + * If you do not override this to specify how much to jump for + * a tab, then tabs are counted as one char. This method is + * called from consume(). + */ +void CharScanner::tab() { + // update inputState->column as function of + // inputState->column and tab stops. + // For example, if tab stops are columns 1 and 5 etc... + // and column is 3, then add 2 to column. + ++inputState->column; +} + +void CharScanner::panic() +{ + ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic" << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +void CharScanner::panic(const ANTLR_USE_NAMESPACE(std)string& s) +{ + ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +/** Report exception errors caught in nextToken() */ +void CharScanner::reportError(const RecognitionException& ex) +{ + ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser error-reporting function can be overridden in subclass */ +void CharScanner::reportError(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if (getFilename().empty()) + ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser warning-reporting function can be overridden in subclass */ +void CharScanner::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if (getFilename().empty()) + ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +void CharScanner::resetText() +{ + text=""; + inputState->tokenStartColumn = inputState->column; + inputState->tokenStartLine = inputState->line; +} + +void CharScanner::rewind(int pos) +{ + inputState->getInput().rewind(pos); +} + +void CharScanner::setCaseSensitive(bool t) +{ + caseSensitive = t; +} + +void CharScanner::setCommitToPath(bool commit) +{ + commitToPath = commit; +} + +void CharScanner::setFilename(const ANTLR_USE_NAMESPACE(std)string& f) +{ inputState->filename=f; } + +void CharScanner::setInputState(LexerSharedInputState state) +{ inputState = state; } + +void CharScanner::setLine(int l) +{ inputState->line=l; } + +void CharScanner::setText(const ANTLR_USE_NAMESPACE(std)string& s) +{ text=s; } + +void CharScanner::setTokenObjectFactory(factory_type factory) +{ tokenFactory=factory; } + +/** Test the token text against the literals table + * Override this method to perform a different literals test */ +int CharScanner::testLiteralsTable(int ttype) const +{ + ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text); + if (i != literals.end()) + ttype = (*i).second; + return ttype; +} + +/** Test the text passed in against the literals table + * Override this method to perform a different literals test + * This is used primarily when you want to test a portion of + * a token. + */ +int CharScanner::testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text_, int ttype) const +{ + ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text_); + if (i != literals.end()) + ttype = (*i).second; + return ttype; +} + +/** Override this method to get more specific case handling */ +int CharScanner::toLower(int c) const +{ + return tolower(c); +} + +void CharScanner::traceIndent() +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; +} + +void CharScanner::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceDepth++; + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "> lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; +} + +void CharScanner::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "< lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; + traceDepth--; +} + +void CharScanner::uponEOF() +{ +} + +#ifndef NO_STATIC_CONSTS +const int CharScanner::NO_CHAR; +const int CharScanner::EOF_CHAR; +#endif + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CommonAST.cpp b/poxml/antlr/src/CommonAST.cpp new file mode 100644 index 00000000..3a4067e3 --- /dev/null +++ b/poxml/antlr/src/CommonAST.cpp @@ -0,0 +1,100 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/config.hpp" +#include "antlr/CommonAST.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +CommonAST::CommonAST() +: BaseAST(), + ttype( Token::INVALID_TYPE ), + text("") +{ +} + +CommonAST::CommonAST(RefToken t) +: BaseAST(), + ttype( t->getType() ), + text( t->getText() ) +{ +} + +CommonAST::~CommonAST() +{ +} + +ANTLR_USE_NAMESPACE(std)string CommonAST::getText() const +{ + return text; +} + +int CommonAST::getType() const +{ + return ttype; +} + +void CommonAST::initialize(int t,const ANTLR_USE_NAMESPACE(std)string& txt) +{ + setType(t); + setText(txt); +} + +void CommonAST::initialize(RefAST t) +{ + setType(t->getType()); + setText(t->getText()); +} + +void CommonAST::initialize(RefToken t) +{ + setType(t->getType()); + setText(t->getText()); +} + +void CommonAST::setText(const ANTLR_USE_NAMESPACE(std)string& txt) +{ + text = txt; +} + +void CommonAST::setType(int type) +{ + ttype = type; +} + +RefAST CommonAST::factory() +{ + return RefAST(new CommonAST); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CommonASTWithHiddenTokens.cpp b/poxml/antlr/src/CommonASTWithHiddenTokens.cpp new file mode 100644 index 00000000..d6c242d2 --- /dev/null +++ b/poxml/antlr/src/CommonASTWithHiddenTokens.cpp @@ -0,0 +1,29 @@ +#include "antlr/config.hpp" +#include "antlr/CommonASTWithHiddenTokens.hpp" +#include "antlr/CommonHiddenStreamToken.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +void CommonASTWithHiddenTokens::initialize(int t,const ANTLR_USE_NAMESPACE(std)string& txt) +{ + CommonAST::initialize(t,txt); +} + +void CommonASTWithHiddenTokens::initialize(RefAST t) +{ + CommonAST::initialize(t); +} + +void CommonASTWithHiddenTokens::initialize(RefToken t) +{ + CommonAST::initialize(t); + hiddenBefore = static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenBefore(); + hiddenAfter = static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenAfter(); +} + +RefAST CommonASTWithHiddenTokens::factory() +{ + return RefAST(new CommonASTWithHiddenTokens); +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/CommonHiddenStreamToken.cpp b/poxml/antlr/src/CommonHiddenStreamToken.cpp new file mode 100644 index 00000000..d33927cc --- /dev/null +++ b/poxml/antlr/src/CommonHiddenStreamToken.cpp @@ -0,0 +1,46 @@ +#include "antlr/CommonHiddenStreamToken.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +CommonHiddenStreamToken::CommonHiddenStreamToken() +: CommonToken() +{ +} + +CommonHiddenStreamToken::CommonHiddenStreamToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt) +: CommonToken(t,txt) +{ +} + +CommonHiddenStreamToken::CommonHiddenStreamToken(const ANTLR_USE_NAMESPACE(std)string& s) +: CommonToken(s) +{ +} + +RefToken CommonHiddenStreamToken::getHiddenAfter() +{ + return hiddenAfter; +} + +RefToken CommonHiddenStreamToken::getHiddenBefore() +{ + return hiddenBefore; +} + +RefToken CommonHiddenStreamToken::factory() +{ + return RefToken(new CommonHiddenStreamToken); +} + +void CommonHiddenStreamToken::setHiddenAfter(RefToken t) +{ + hiddenAfter = t; +} + +void CommonHiddenStreamToken::setHiddenBefore(RefToken t) +{ + hiddenBefore = t; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/CommonToken.cpp b/poxml/antlr/src/CommonToken.cpp new file mode 100644 index 00000000..ff60bd79 --- /dev/null +++ b/poxml/antlr/src/CommonToken.cpp @@ -0,0 +1,81 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/CommonToken.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +CommonToken::CommonToken() : Token(), line(1), col(1), text("") +{} + +CommonToken::CommonToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt) + : Token(t), line(1), col(1), text(txt) +{} + +CommonToken::CommonToken(const ANTLR_USE_NAMESPACE(std)string& s) + : Token(), line(1), col(1), text(s) +{} + +int CommonToken::getLine() const +{ return line; } + +ANTLR_USE_NAMESPACE(std)string CommonToken::getText() const +{ return text; } + +void CommonToken::setLine(int l) +{ line=l; } + +void CommonToken::setText(const ANTLR_USE_NAMESPACE(std)string& s) +{ text=s; } + +ANTLR_USE_NAMESPACE(std)string CommonToken::toString() const +{ + return "[\""+getText()+"\",<"+type+">,line="+line+"]"; +} + +int CommonToken::getColumn() const +{ return col; } + +void CommonToken::setColumn(int c) +{ col=c; } + +bool CommonToken::isInvalid() const +{ return type==INVALID_TYPE; } + +RefToken CommonToken::factory() +{ + return RefToken(new CommonToken); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/InputBuffer.cpp b/poxml/antlr/src/InputBuffer.cpp new file mode 100644 index 00000000..058c32ab --- /dev/null +++ b/poxml/antlr/src/InputBuffer.cpp @@ -0,0 +1,109 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +/**A Stream of characters fed to the lexer from a InputStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input characters. Normally, + * "k" characters are stored in the buffer. More characters may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of characters is deferred. In other words, reading the next + * character is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.CharQueue + */ + +#include "antlr/InputBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** Create a character buffer */ +InputBuffer::InputBuffer() +: nMarkers(0), markerOffset(0), numToConsume(0) +{} + +/** This method updates the state of the input buffer so that + * the text matched since the most recent mark() is no longer + * held by the buffer. So, you either do a mark/rewind for + * failed predicate or mark/commit to keep on parsing without + * rewinding the input. + */ +void InputBuffer::commit() +{ + nMarkers--; +} + +/** Mark another character for deferred consumption */ +void InputBuffer::consume() +{ + numToConsume++; +} + +/** Ensure that the character buffer is sufficiently full */ +void InputBuffer::fill(int amount) +{ + syncConsume(); + // Fill the buffer sufficiently to hold needed characters + while (queue.entries() < amount + markerOffset) { + // Append the next character + queue.append(getChar()); + } +} + +bool InputBuffer::isMarked() const +{ + return (nMarkers != 0); +} + +/**Return an integer marker that can be used to rewind the buffer to + * its current state. + */ +int InputBuffer::mark() +{ + syncConsume(); + nMarkers++; + return markerOffset; +} + +/**Rewind the character buffer to a marker. + * @param mark Marker returned previously from mark() + */ +void InputBuffer::rewind(int mark) +{ + syncConsume(); + markerOffset = mark; + nMarkers--; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/LLkParser.cpp b/poxml/antlr/src/LLkParser.cpp new file mode 100644 index 00000000..2f21cd8b --- /dev/null +++ b/poxml/antlr/src/LLkParser.cpp @@ -0,0 +1,105 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/LLkParser.hpp" +#include <iostream> + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**An LL(k) parser. + * + * @see antlr.Token + * @see antlr.TokenBuffer + * @see antlr.LL1Parser + */ + +// LLkParser(int k_); + +LLkParser::LLkParser(const ParserSharedInputState& state, int k_) +: Parser(state), k(k_) +{} + +LLkParser::LLkParser(TokenBuffer& tokenBuf, int k_) +: Parser(tokenBuf), k(k_) +{} + +LLkParser::LLkParser(TokenStream& lexer, int k_) +: Parser(new TokenBuffer(lexer)), k(k_) +{ +} + +/**Consume another token from the input stream. Can only write sequentially! + * If you need 3 tokens ahead, you must consume() 3 times. + * <p> + * Note that it is possible to overwrite tokens that have not been matched. + * For example, calling consume() 3 times when k=2, means that the first token + * consumed will be overwritten with the 3rd. + */ +void LLkParser::consume() +{ inputState->getInput().consume(); } + +int LLkParser::LA(int i) +{ return inputState->getInput().LA(i); } + +RefToken LLkParser::LT(int i) +{ return inputState->getInput().LT(i); } + +void LLkParser::trace(const ANTLR_USE_NAMESPACE(std)string& ee, const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceIndent(); + + ANTLR_USE_NAMESPACE(std)cout << ee.c_str() << rname.c_str() << ((inputState->guessing>0)?"; [guessing]":"; "); + + for (int i = 1; i <= k; i++) + { + if (i != 1) { + ANTLR_USE_NAMESPACE(std)cout << ", "; + } + ANTLR_USE_NAMESPACE(std)cout << "LA(" << i << ")==" << LT(i)->getText().c_str(); + } + + ANTLR_USE_NAMESPACE(std)cout << ANTLR_USE_NAMESPACE(std)endl; +} + +void LLkParser::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceDepth++; + trace("> ",rname); +} + +void LLkParser::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + trace("< ",rname); + traceDepth--; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/LexerSharedInputState.cpp b/poxml/antlr/src/LexerSharedInputState.cpp new file mode 100644 index 00000000..a95f33a8 --- /dev/null +++ b/poxml/antlr/src/LexerSharedInputState.cpp @@ -0,0 +1,55 @@ +#include "antlr/LexerSharedInputState.hpp" +#include "antlr/CharBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input stream of characters. Multiple lexers + * share a single LexerSharedInputState to lex + * the same input stream. + */ + +LexerInputState::LexerInputState(InputBuffer* inbuf) +: column(1) +, line(1) +, tokenStartColumn(1) +, tokenStartLine(1) +, guessing(0) +, filename("") +, input(inbuf) +, inputResponsible(true) +{ +} + +LexerInputState::LexerInputState(InputBuffer& inbuf) +: column(1) +, line(1) +, tokenStartColumn(1) +, tokenStartLine(1) +, guessing(0) +, filename("") +, input(&inbuf) +, inputResponsible(false) +{ +} + +LexerInputState::LexerInputState(ANTLR_USE_NAMESPACE(std)istream& in) +: column(1) +, line(1) +, tokenStartColumn(1) +, tokenStartLine(1) +, guessing(0) +, filename("") +, input(new CharBuffer(in)) +, inputResponsible(true) +{ +} + +LexerInputState::~LexerInputState() +{ + if (inputResponsible) + delete input; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/Makefile.am b/poxml/antlr/src/Makefile.am new file mode 100644 index 00000000..7a5d2426 --- /dev/null +++ b/poxml/antlr/src/Makefile.am @@ -0,0 +1,39 @@ + +# Make #include <antlr/xxx> work.. +INCLUDES=-I$(srcdir)/.. +KDE_CXXFLAGS = $(USE_EXCEPTIONS) + +noinst_LTLIBRARIES = libantlr.la + +libantlr_la_LDFLAGS = -no-undefined + +libantlr_la_SOURCES = \ + ANTLRException.cpp \ + ASTFactory.cpp \ + ASTRefCount.cpp \ + BaseAST.cpp \ + BitSet.cpp \ + CharBuffer.cpp \ + CharScanner.cpp \ + CommonAST.cpp \ + CommonASTWithHiddenTokens.cpp \ + CommonHiddenStreamToken.cpp \ + CommonToken.cpp \ + InputBuffer.cpp \ + LLkParser.cpp \ + LexerSharedInputState.cpp \ + MismatchedCharException.cpp \ + MismatchedTokenException.cpp \ + NoViableAltException.cpp \ + NoViableAltForCharException.cpp \ + Parser.cpp \ + ParserSharedInputState.cpp \ + RecognitionException.cpp \ + String.cpp \ + Token.cpp \ + TokenBuffer.cpp \ + TokenStreamBasicFilter.cpp \ + TokenStreamHiddenTokenFilter.cpp \ + TokenStreamSelector.cpp \ + TreeParser.cpp \ + TreeParserSharedInputState.cpp diff --git a/poxml/antlr/src/MismatchedCharException.cpp b/poxml/antlr/src/MismatchedCharException.cpp new file mode 100644 index 00000000..4dede0e8 --- /dev/null +++ b/poxml/antlr/src/MismatchedCharException.cpp @@ -0,0 +1,153 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1999 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1999 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/MismatchedCharException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +MismatchedCharException::MismatchedCharException() +: RecognitionException("Mismatched char") +{} + +// Expected range / not range +MismatchedCharException::MismatchedCharException( + int c, + int lower, + int upper_, + bool matchNot, + CharScanner* scanner_ +) : RecognitionException("Mismatched char", + scanner_->getFilename(), + scanner_->getLine(), + scanner_->getColumn()) + , mismatchType(matchNot ? NOT_RANGE : RANGE) + , foundChar(c) + , expecting(lower) + , upper(upper_) + , scanner(scanner_) +{ +} + +// Expected token / not token +MismatchedCharException::MismatchedCharException( + int c, + int expecting_, + bool matchNot, + CharScanner* scanner_ +) : RecognitionException("Mismatched char", + scanner_->getFilename(), + scanner_->getLine(), + scanner_->getColumn()) + , mismatchType(matchNot ? NOT_CHAR : CHAR) + , foundChar(c) + , expecting(expecting_) + , scanner(scanner_) +{ +} + +// Expected BitSet / not BitSet +MismatchedCharException::MismatchedCharException( + int c, + BitSet set_, + bool matchNot, + CharScanner* scanner_ +) : RecognitionException("Mismatched char", + scanner_->getFilename(), + scanner_->getLine(), + scanner_->getColumn()) + , mismatchType(matchNot ? NOT_SET : SET) + , foundChar(c) + , set(set_) + , scanner(scanner_) +{ +} + +MismatchedCharException::MismatchedCharException( + const ANTLR_USE_NAMESPACE(std)string& s, + int line +) : RecognitionException(s) +{ +} + +/** + * Returns the error message that happened on the line/col given. + * Copied from toString(). + */ +ANTLR_USE_NAMESPACE(std)string MismatchedCharException::getMessage() const +{ + ANTLR_USE_NAMESPACE(std)string s; + + switch (mismatchType) { + case CHAR : + s += "expecting '" + charName(expecting) + "', found '" + charName(foundChar) + "'"; + break; + case NOT_CHAR : + s += "expecting anything but '" + charName(expecting) + "'; got it anyway"; + break; + case RANGE : + s += "expecting token in range: '" + charName(expecting) + "'..'" + charName(upper) + "', found '" + charName(foundChar) + "'"; + break; + case NOT_RANGE : + s += "expecting token NOT in range: " + charName(expecting) + "'..'" + charName(upper) + "', found '" + charName(foundChar) + "'"; + break; + case SET : + case NOT_SET : + { + s += ANTLR_USE_NAMESPACE(std)string("expecting ") + (mismatchType == NOT_SET ? "NOT " : "") + "one of ("; + ANTLR_USE_NAMESPACE(std)vector<int> elems = set.toArray(); + for (int i = 0; i < (int) elems.size(); i++) { + s += " '"; + s += charName(elems[i]); + s += "'"; + } + s += "), found '" + charName(foundChar) + "'"; + } + break; + default : + s += RecognitionException::getMessage(); + break; + } + + return s; +} + +#ifndef NO_STATIC_CONSTS +const int MismatchedCharException::CHAR; +const int MismatchedCharException::NOT_CHAR; +const int MismatchedCharException::RANGE; +const int MismatchedCharException::NOT_RANGE; +const int MismatchedCharException::SET; +const int MismatchedCharException::NOT_SET; +#endif + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/MismatchedTokenException.cpp b/poxml/antlr/src/MismatchedTokenException.cpp new file mode 100644 index 00000000..b8b10808 --- /dev/null +++ b/poxml/antlr/src/MismatchedTokenException.cpp @@ -0,0 +1,223 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/MismatchedTokenException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +MismatchedTokenException::MismatchedTokenException() +: RecognitionException("Mismatched Token: expecting any AST node","<AST>",1) +, token(0) +, node(nullASTptr) +{ +} + +// Expected range / not range +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + int lower, + int upper_, + bool matchNot +) : RecognitionException("Mismatched Token") + , tokenNames(tokenNames_) + , token(0) + , node(node_) + , tokenText( (node_ ? node_->toString(): ANTLR_USE_NAMESPACE(std)string("<empty tree>")) ) + , mismatchType(matchNot ? NOT_RANGE : RANGE) + , expecting(lower) + , upper(upper_) +{ + fileName = "<AST>"; +} + +// Expected token / not token +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + int expecting_, + bool matchNot +) : RecognitionException("Mismatched Token") + , tokenNames(tokenNames_) + , token(0) + , node(node_) + , tokenText( (node_ ? node_->toString(): ANTLR_USE_NAMESPACE(std)string("<empty tree>")) ) + , mismatchType(matchNot ? NOT_TOKEN : TOKEN) + , expecting(expecting_) +{ + fileName = "<AST>"; +} + +// Expected BitSet / not BitSet +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefAST node_, + BitSet set_, + bool matchNot +) : RecognitionException("Mismatched Token") + , tokenNames(tokenNames_) + , token(0) + , node(node_) + , tokenText( (node_ ? node_->toString(): ANTLR_USE_NAMESPACE(std)string("<empty tree>")) ) + , mismatchType(matchNot ? NOT_SET : SET) + , set(set_) +{ + fileName = "<AST>"; +} + +// Expected range / not range +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + int lower, + int upper_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ +) : RecognitionException("Mismatched Token",fileName_,token_->getLine(),token_->getColumn()) + , tokenNames(tokenNames_) + , token(token_) + , node(nullASTptr) + , tokenText(token_->getText()) + , mismatchType(matchNot ? NOT_RANGE : RANGE) + , expecting(lower) + , upper(upper_) +{ +} + +// Expected token / not token +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + int expecting_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ +) : RecognitionException("Mismatched Token",fileName_,token_->getLine(),token_->getColumn()) + , tokenNames(tokenNames_) + , token(token_) + , node(nullASTptr) + , tokenText(token_->getText()) + , mismatchType(matchNot ? NOT_TOKEN : TOKEN) + , expecting(expecting_) +{ +} + +// Expected BitSet / not BitSet +MismatchedTokenException::MismatchedTokenException( + const ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string>& tokenNames_, + RefToken token_, + BitSet set_, + bool matchNot, + const ANTLR_USE_NAMESPACE(std)string& fileName_ +) : RecognitionException("Mismatched Token",fileName_,token_->getLine(),token_->getColumn()) + , tokenNames(tokenNames_) + , token(token_) + , node(nullASTptr) + , tokenText(token_->getText()) + , mismatchType(matchNot ? NOT_SET : SET) + , set(set_) +{ +} + +// deprecated As of ANTLR 2.7.0 +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::getErrorMessage() const +{ + return getMessage(); +} + +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::getMessage() const +{ + ANTLR_USE_NAMESPACE(std)string s; + switch (mismatchType) { + case TOKEN: + s += "expecting " + tokenName(expecting) + ", found '" + tokenText + "'"; + break; + case NOT_TOKEN: + s += "expecting anything but " + tokenName(expecting) + "; got it anyway"; + break; + case RANGE: + s += "expecting token in range: " + tokenName(expecting) + ".." + tokenName(upper) + ", found '" + tokenText + "'"; + break; + case NOT_RANGE: + s += "expecting token NOT in range: " + tokenName(expecting) + ".." + tokenName(upper) + ", found '" + tokenText + "'"; + break; + case SET: + case NOT_SET: + { + s += ANTLR_USE_NAMESPACE(std)string("expecting ") + (mismatchType == NOT_SET ? "NOT " : "") + "one of ("; + ANTLR_USE_NAMESPACE(std)vector<int> elems = set.toArray(); + for (int i = 0; i < (int) elems.size(); i++) + { + s += " "; + s += tokenName(elems[i]); + } + s += "), found '" + tokenText + "'"; + } + break; + default: + s = RecognitionException::getMessage(); + break; + } + return s; +} + +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::tokenName(int tokenType) const +{ + if (tokenType == Token::INVALID_TYPE) { + return "<Set of tokens>"; + } + else if (tokenType < 0 || tokenType >= (int) tokenNames.size()) { + return ANTLR_USE_NAMESPACE(std)string("<") + tokenType + ">"; + } + else { + return tokenNames[tokenType]; + } +} + +ANTLR_USE_NAMESPACE(std)string MismatchedTokenException::toString() const { + if (token) { + return getFileLineString() + getMessage(); + } + return getMessage(); +} + +#ifndef NO_STATIC_CONSTS +const int MismatchedTokenException::TOKEN; +const int MismatchedTokenException::NOT_TOKEN; +const int MismatchedTokenException::RANGE; +const int MismatchedTokenException::NOT_RANGE; +const int MismatchedTokenException::SET; +const int MismatchedTokenException::NOT_SET; +#endif + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/NoViableAltException.cpp b/poxml/antlr/src/NoViableAltException.cpp new file mode 100644 index 00000000..433f4325 --- /dev/null +++ b/poxml/antlr/src/NoViableAltException.cpp @@ -0,0 +1,82 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/NoViableAltException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +NoViableAltException::NoViableAltException(RefAST t) +: RecognitionException("NoViableAlt") +, token(0) +, node(t) +{ + fileName = "<AST>"; +} + +NoViableAltException::NoViableAltException(RefToken t,const ANTLR_USE_NAMESPACE(std)string& fileName_) +: RecognitionException("NoViableAlt") // line ")+t.getLine()+" token is "+t.getText()) +, token(t) +, node(nullASTptr) +{ + line = t->getLine(); + column = t->getColumn(); + fileName = fileName_; +} + +ANTLR_USE_NAMESPACE(std)string NoViableAltException::getErrorMessage() const +{ + return getMessage(); +} + +ANTLR_USE_NAMESPACE(std)string NoViableAltException::getMessage() const +{ + if (token) + return ANTLR_USE_NAMESPACE(std)string("unexpected token: ")+token->getText(); + + // must a tree parser error if token==null + if (!node) { + return "unexpected end of subtree"; + } + return ANTLR_USE_NAMESPACE(std)string("unexpected AST node: ")+node->toString(); +} + +ANTLR_USE_NAMESPACE(std)string NoViableAltException::toString() const +{ + if (token) + return getFileLineString()+getMessage(); + else + return getMessage(); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/NoViableAltForCharException.cpp b/poxml/antlr/src/NoViableAltForCharException.cpp new file mode 100644 index 00000000..2ff9120f --- /dev/null +++ b/poxml/antlr/src/NoViableAltForCharException.cpp @@ -0,0 +1,71 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Institute + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Institute + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/NoViableAltForCharException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +NoViableAltForCharException::NoViableAltForCharException(int c, CharScanner* scanner) +: RecognitionException("NoViableAlt") +, foundChar(c) +{ + line = scanner->getLine(); + fileName = scanner->getFilename(); +} + +NoViableAltForCharException::NoViableAltForCharException(int c, const ANTLR_USE_NAMESPACE(std)string& fileName_, int line_) +: RecognitionException("NoViableAlt") +, foundChar(c) +{ + line = line_; + fileName = fileName_; +} + +/** + * @deprecated As of ANTLR 2.7.0 + */ +ANTLR_USE_NAMESPACE(std)string NoViableAltForCharException::getErrorMessage() const +{ + return getMessage(); +} + +/** + * Returns a clean error message (no line number/column information) + */ +ANTLR_USE_NAMESPACE(std)string NoViableAltForCharException::getMessage() const +{ + return ANTLR_USE_NAMESPACE(std)string("unexpected char: ")+charName(foundChar); +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/Parser.cpp b/poxml/antlr/src/Parser.cpp new file mode 100644 index 00000000..5a0388d4 --- /dev/null +++ b/poxml/antlr/src/Parser.cpp @@ -0,0 +1,304 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/Parser.hpp" + +#include "antlr/BitSet.hpp" +#include "antlr/TokenBuffer.hpp" +#include "antlr/MismatchedTokenException.hpp" +//#include "antlr/ASTFactory.hpp" +#include <iostream> +#include <stdlib.h> + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(exit) + +/**A generic ANTLR parser (LL(k) for k>=1) containing a bunch of + * utility routines useful at any lookahead depth. We distinguish between + * the LL(1) and LL(k) parsers because of efficiency. This may not be + * necessary in the near future. + * + * Each parser object contains the state of the parse including a lookahead + * cache (the form of which is determined by the subclass), whether or + * not the parser is in guess mode, where tokens come from, etc... + * + * <p> + * During <b>guess</b> mode, the current lookahead token(s) and token type(s) + * cache must be saved because the token stream may not have been informed + * to save the token (via <tt>mark</tt>) before the <tt>try</tt> block. + * Guessing is started by: + * <ol> + * <li>saving the lookahead cache. + * <li>marking the current position in the TokenBuffer. + * <li>increasing the guessing level. + * </ol> + * + * After guessing, the parser state is restored by: + * <ol> + * <li>restoring the lookahead cache. + * <li>rewinding the TokenBuffer. + * <li>decreasing the guessing level. + * </ol> + * + * @see antlr.Token + * @see antlr.TokenBuffer + * @see antlr.TokenStream + * @see antlr.LL1Parser + * @see antlr.LLkParser + */ + +bool DEBUG_PARSER=false; + +Parser::Parser(TokenBuffer& input) +: inputState(new ParserInputState(input)), traceDepth(0) +{ +} + +Parser::Parser(TokenBuffer* input) +: inputState(new ParserInputState(input)), traceDepth(0) +{ +} + +Parser::Parser(const ParserSharedInputState& state) +: inputState(state), traceDepth(0) +{ +} + +Parser::~Parser() +{ +} + +void Parser::setTokenNames(const char** tokenNames_) +{ + while (*tokenNames_) { + tokenNames.push_back(*(tokenNames_++)); + } +} + +/** Consume tokens until one matches the given token */ +void Parser::consumeUntil(int tokenType) +{ + while (LA(1) != Token::EOF_TYPE && LA(1) != tokenType) + consume(); +} + +/** Consume tokens until one matches the given token set */ +void Parser::consumeUntil(const BitSet& set) +{ + while (LA(1) != Token::EOF_TYPE && !set.member(LA(1))) + consume(); +} + +/** Get the AST return value squirreled away in the parser */ +RefAST Parser::getAST() +{ + return returnAST; +} + +ASTFactory& Parser::getASTFactory() +{ + return astFactory; +} + +ANTLR_USE_NAMESPACE(std)string Parser::getFilename() const +{ + return inputState->filename; +} + +ParserSharedInputState Parser::getInputState() const +{ + return inputState; +} + +ANTLR_USE_NAMESPACE(std)string Parser::getTokenName(int num) const +{ + return tokenNames[num]; +} + +ANTLR_USE_NAMESPACE(std)vector<ANTLR_USE_NAMESPACE(std)string> Parser::getTokenNames() const +{ + return tokenNames; +} + +// Forwarded to TokenBuffer +int Parser::mark() +{ + return inputState->getInput().mark(); +} + +/**Make sure current lookahead symbol matches token type <tt>t</tt>. + * Throw an exception upon mismatch, which is catch by either the + * error handler or by the syntactic predicate. + */ +void Parser::match(int t) +{ + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "enter match(" << t << ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; + } + if ( LA(1)!=t ) { + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << "!=" << t << ANTLR_USE_NAMESPACE(std)endl; + } + throw MismatchedTokenException(tokenNames, LT(1), t, false, getFilename()); + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } +} + +/**Make sure current lookahead symbol matches the given set + * Throw an exception upon mismatch, which is catch by either the + * error handler or by the syntactic predicate. + */ +void Parser::match(const BitSet& b) +{ + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "enter match(" << "bitset" /*b.toString()*/ + << ") with LA(1)=" << LA(1) << ANTLR_USE_NAMESPACE(std)endl; + } + if ( !b.member(LA(1)) ) { + if ( DEBUG_PARSER ) + { + traceIndent(); + ANTLR_USE_NAMESPACE(std)cout << "token mismatch: " << LA(1) << " not member of " + << "bitset" /*b.toString()*/ << ANTLR_USE_NAMESPACE(std)endl; + } + throw MismatchedTokenException(tokenNames, LT(1), b, false, getFilename()); + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } +} + +void Parser::matchNot(int t) +{ + if ( LA(1)==t ) { + // Throws inverted-sense exception + throw MismatchedTokenException(tokenNames, LT(1), t, true, getFilename()); + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } +} + +void Parser::panic() +{ + ANTLR_USE_NAMESPACE(std)cerr << "Parser: panic" << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +/** Parser error-reporting function can be overridden in subclass */ +void Parser::reportError(const RecognitionException& ex) +{ + ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser error-reporting function can be overridden in subclass */ +void Parser::reportError(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if ( getFilename().empty() ) + ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser warning-reporting function can be overridden in subclass */ +void Parser::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s) +{ + if ( getFilename().empty() ) + ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; + else + ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +void Parser::rewind(int pos) +{ + inputState->getInput().rewind(pos); +} + +/** Set the object used to generate ASTs */ +// void setASTFactory(ASTFactory astFactory_); + +/** Specify the type of node to create during tree building */ +void Parser::setASTNodeFactory(ASTFactory::factory_type factory) +{ + astFactory.setASTNodeFactory(factory); +} + +void Parser::setFilename(const ANTLR_USE_NAMESPACE(std)string& f) +{ + inputState->filename = f; +} + +void Parser::setInputState(ParserSharedInputState state) +{ + inputState = state; +} + +/** Set or change the input token buffer */ +// void setTokenBuffer(TokenBuffer<Token>* t); + +void Parser::traceIndent() +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; +} + +void Parser::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + traceDepth++; + + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; + + ANTLR_USE_NAMESPACE(std)cout << "> " << rname.c_str() << "; LA(1)==" << LT(1)->getText().c_str() << + ((inputState->guessing>0)?" [guessing]":"") << ANTLR_USE_NAMESPACE(std)endl; +} + +void Parser::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname) +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; + + ANTLR_USE_NAMESPACE(std)cout << "< " << rname.c_str() << "; LA(1)==" << LT(1)->getText().c_str() << + ((inputState->guessing>0)?" [guessing]":"") << ANTLR_USE_NAMESPACE(std)endl; + + traceDepth--; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/ParserSharedInputState.cpp b/poxml/antlr/src/ParserSharedInputState.cpp new file mode 100644 index 00000000..102aba87 --- /dev/null +++ b/poxml/antlr/src/ParserSharedInputState.cpp @@ -0,0 +1,37 @@ +#include "antlr/ParserSharedInputState.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input stream of tokens. Multiple parsers + * share a single ParserSharedInputState to parse + * the same stream of tokens. + */ + +ParserInputState::ParserInputState(TokenBuffer* input_) +: guessing(0) +, input(input_) +, inputResponsible(true) +{ +} + +ParserInputState::ParserInputState(TokenBuffer& input_) +: guessing(0) +, input(&input_) +, inputResponsible(false) +{ +} + +ParserInputState::~ParserInputState() +{ + if (inputResponsible) + delete input; +} + +TokenBuffer& ParserInputState::getInput() +{ + return *input; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/RecognitionException.cpp b/poxml/antlr/src/RecognitionException.cpp new file mode 100644 index 00000000..1d1bd53d --- /dev/null +++ b/poxml/antlr/src/RecognitionException.cpp @@ -0,0 +1,87 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/RecognitionException.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +RecognitionException::RecognitionException() +: ANTLRException("parsing error"), line(1), column(1) +{} + +RecognitionException::RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s) +: ANTLRException(s) +{} + +RecognitionException::RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s,const ANTLR_USE_NAMESPACE(std)string& fileName_,int line_) +: ANTLRException(s), fileName(fileName_), line(line_) +{} + +RecognitionException::RecognitionException(const ANTLR_USE_NAMESPACE(std)string& s,const ANTLR_USE_NAMESPACE(std)string& fileName_,int line_,int column_) +: ANTLRException(s), fileName(fileName_), line(line_), column(column_) +{} + +int RecognitionException::getColumn() const +{ + return column; +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::getErrorMessage() const +{ + return getMessage(); +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::getFileLineString() const +{ + if ( fileName.length() ) + return fileName+": "+line+": "; + else + return ANTLR_USE_NAMESPACE(std)string("line ")+line+": "; +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::getFilename() const +{ + return fileName; +} + +int RecognitionException::getLine() const +{ + return line; +} + +ANTLR_USE_NAMESPACE(std)string RecognitionException::toString() const +{ + return getFileLineString()+getMessage(); +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/String.cpp b/poxml/antlr/src/String.cpp new file mode 100644 index 00000000..6d9df7a5 --- /dev/null +++ b/poxml/antlr/src/String.cpp @@ -0,0 +1,61 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/String.hpp" + +#ifdef HAS_NOT_CSTDIO_H +#include <stdio.h> +#else +#include <cstdio> +#endif + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(sprintf) + +ANTLR_USE_NAMESPACE(std)string operator+(const ANTLR_USE_NAMESPACE(std)string& lhs,int rhs) +{ + char tmp[100]; + sprintf(tmp,"%d",rhs); + return lhs+tmp; +} + +ANTLR_USE_NAMESPACE(std)string charName(int ch) +{ + if (ch == EOF) + return "EOF"; + else { + return ANTLR_USE_NAMESPACE(std)string(1, static_cast<char>(ch)); + } +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/Token.cpp b/poxml/antlr/src/Token.cpp new file mode 100644 index 00000000..f307774f --- /dev/null +++ b/poxml/antlr/src/Token.cpp @@ -0,0 +1,108 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/Token.hpp" +#include "antlr/String.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +RefToken Token::badToken(new Token(Token::INVALID_TYPE, "<no text>")); + +Token::Token() : type(INVALID_TYPE) +{ +} + +Token::Token(int t) : type(t) +{ +} + +Token::Token(int t, const ANTLR_USE_NAMESPACE(std)string& txt) + : type(t) +{ + type=t; + setText(txt); +} + +int Token::getColumn() const +{ + return 0; +} + +int Token::getLine() const +{ + return 0; +} + +ANTLR_USE_NAMESPACE(std)string Token::getText() const +{ + return "<no text>"; +} + +int Token::getType() const +{ + return type; +} + +void Token::setColumn(int c) +{} + +void Token::setLine(int l) +{} + +void Token::setText(const ANTLR_USE_NAMESPACE(std)string& t) +{} + +void Token::setType(int t) +{ + type=t; +} + +ANTLR_USE_NAMESPACE(std)string Token::toString() const +{ + return "[\""+getText()+"\",<"+type+">]"; +} + +Token::~Token() +{} + +RefToken nullToken; + +#ifndef NO_STATIC_CONSTS +const int Token::MIN_USER_TYPE; +const int Token::NULL_TREE_LOOKAHEAD; +const int Token::INVALID_TYPE; +const int Token::EOF_TYPE; +const int Token::SKIP; +#endif + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenBuffer.cpp b/poxml/antlr/src/TokenBuffer.cpp new file mode 100644 index 00000000..ded5df9b --- /dev/null +++ b/poxml/antlr/src/TokenBuffer.cpp @@ -0,0 +1,107 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ + +#include "antlr/TokenBuffer.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**A Stream of Token objects fed to the parser from a TokenStream that can + * be rewound via mark()/rewind() methods. + * <p> + * A dynamic array is used to buffer up all the input tokens. Normally, + * "k" tokens are stored in the buffer. More tokens may be stored during + * guess mode (testing syntactic predicate), or when LT(i>k) is referenced. + * Consumption of tokens is deferred. In other words, reading the next + * token is not done by conume(), but deferred until needed by LA or LT. + * <p> + * + * @see antlr.Token + * @see antlr.TokenStream + * @see antlr.TokenQueue + */ + +/** Create a token buffer */ +TokenBuffer::TokenBuffer(TokenStream& input_) +: input(input_) +{ nMarkers=0; markerOffset=0; numToConsume=0; } + +/** Mark another token for deferred consumption */ +void TokenBuffer::consume() +{ numToConsume++; } + +/** Ensure that the token buffer is sufficiently full */ +void TokenBuffer::fill(int amount) +{ + syncConsume(); + // Fill the buffer sufficiently to hold needed tokens + while (queue.entries() < amount + markerOffset) { + // Append the next token + queue.append(input.nextToken()); + } +} + +/** Get a lookahead token value */ +int TokenBuffer::LA(int i) +{ + fill(i); + return queue.elementAt(markerOffset+i-1)->type; +} + +/** Get a lookahead token */ +RefToken TokenBuffer::LT(int i) +{ + fill(i); + return queue.elementAt(markerOffset+i-1); +} + +/**Return an integer marker that can be used to rewind the buffer to + * its current state. + */ +int TokenBuffer::mark() +{ + syncConsume(); + nMarkers++; + return markerOffset; +} + +/**Rewind the token buffer to a marker. + * @param mark Marker returned previously from mark() + */ +void TokenBuffer::rewind(int mark) +{ + syncConsume(); + markerOffset=mark; + nMarkers--; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenStreamBasicFilter.cpp b/poxml/antlr/src/TokenStreamBasicFilter.cpp new file mode 100644 index 00000000..71257f46 --- /dev/null +++ b/poxml/antlr/src/TokenStreamBasicFilter.cpp @@ -0,0 +1,34 @@ +#include "antlr/TokenStreamBasicFilter.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object is a TokenStream that passes through all + * tokens except for those that you tell it to discard. + * There is no buffering of the tokens. + */ +TokenStreamBasicFilter::TokenStreamBasicFilter(TokenStream& input_) +: input(&input_) +{ +} + +void TokenStreamBasicFilter::discard(int ttype) +{ + discardMask.add(ttype); +} + +void TokenStreamBasicFilter::discard(const BitSet& mask) +{ + discardMask = mask; +} + +RefToken TokenStreamBasicFilter::nextToken() +{ + RefToken tok = input->nextToken(); + while ( tok && discardMask.member(tok->getType()) ) { + tok = input->nextToken(); + } + return tok; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenStreamHiddenTokenFilter.cpp b/poxml/antlr/src/TokenStreamHiddenTokenFilter.cpp new file mode 100644 index 00000000..827ca382 --- /dev/null +++ b/poxml/antlr/src/TokenStreamHiddenTokenFilter.cpp @@ -0,0 +1,146 @@ +#include "antlr/TokenStreamHiddenTokenFilter.hpp" +#include "antlr/CommonHiddenStreamToken.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/**This object filters a token stream coming from a lexer + * or another TokenStream so that only certain token channels + * get transmitted to the parser. + * + * Any of the channels can be filtered off as "hidden" channels whose + * tokens can be accessed from the parser. + */ + +TokenStreamHiddenTokenFilter::TokenStreamHiddenTokenFilter(TokenStream& input) +: TokenStreamBasicFilter(input) +{ +} + +void TokenStreamHiddenTokenFilter::consume() +{ + nextMonitoredToken = input->nextToken(); +} + +void TokenStreamHiddenTokenFilter::consumeFirst() +{ + consume(); + + // Handle situation where hidden or discarded tokens + // appear first in input stream + RefToken p; + // while hidden or discarded scarf tokens + while ( hideMask.member(LA(1)->getType()) || discardMask.member(LA(1)->getType()) ) { + if ( hideMask.member(LA(1)->getType()) ) { + if ( !p ) { + p = LA(1); + } + else { + static_cast<CommonHiddenStreamToken*>(p.get())->setHiddenAfter(LA(1)); + static_cast<CommonHiddenStreamToken*>(LA(1).get())->setHiddenBefore(p); // double-link + p = LA(1); + } + lastHiddenToken = p; + if (!firstHidden) + firstHidden = p; // record hidden token if first + } + consume(); + } +} + +BitSet TokenStreamHiddenTokenFilter::getDiscardMask() const +{ + return discardMask; +} + +/** Return a ptr to the hidden token appearing immediately after + * token t in the input stream. + */ +RefToken TokenStreamHiddenTokenFilter::getHiddenAfter(RefToken t) +{ + return static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenAfter(); +} + +/** Return a ptr to the hidden token appearing immediately before + * token t in the input stream. + */ +RefToken TokenStreamHiddenTokenFilter::getHiddenBefore(RefToken t) +{ + return static_cast<CommonHiddenStreamToken*>(t.get())->getHiddenBefore(); +} + +BitSet TokenStreamHiddenTokenFilter::getHideMask() const +{ + return hideMask; +} + +/** Return the first hidden token if one appears + * before any monitored token. + */ +RefToken TokenStreamHiddenTokenFilter::getInitialHiddenToken() +{ + return firstHidden; +} + +void TokenStreamHiddenTokenFilter::hide(int m) +{ + hideMask.add(m); +} + +void TokenStreamHiddenTokenFilter::hide(const BitSet& mask) +{ + hideMask = mask; +} + +RefToken TokenStreamHiddenTokenFilter::LA(int i) +{ + return nextMonitoredToken; +} + +/** Return the next monitored token. +* Test the token following the monitored token. +* If following is another monitored token, save it +* for the next invocation of nextToken (like a single +* lookahead token) and return it then. +* If following is unmonitored, nondiscarded (hidden) +* channel token, add it to the monitored token. +* +* Note: EOF must be a monitored Token. +*/ +RefToken TokenStreamHiddenTokenFilter::nextToken() +{ + // handle an initial condition; don't want to get lookahead + // token of this splitter until first call to nextToken + if ( !LA(1) ) { + consumeFirst(); + } + + // we always consume hidden tokens after monitored, thus, + // upon entry LA(1) is a monitored token. + RefToken monitored = LA(1); + // point to hidden tokens found during last invocation + static_cast<CommonHiddenStreamToken*>(monitored.get())->setHiddenBefore(lastHiddenToken); + lastHiddenToken = nullToken; + + // Look for hidden tokens, hook them into list emanating + // from the monitored tokens. + consume(); + RefToken p = monitored; + // while hidden or discarded scarf tokens + while ( hideMask.member(LA(1)->getType()) || discardMask.member(LA(1)->getType()) ) { + if ( hideMask.member(LA(1)->getType()) ) { + // attach the hidden token to the monitored in a chain + // link forwards + static_cast<CommonHiddenStreamToken*>(p.get())->setHiddenAfter(LA(1)); + // link backwards + if (p != monitored) { //hidden cannot point to monitored tokens + static_cast<CommonHiddenStreamToken*>(LA(1).get())->setHiddenBefore(p); + } + p = lastHiddenToken = LA(1); + } + consume(); + } + return monitored; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TokenStreamSelector.cpp b/poxml/antlr/src/TokenStreamSelector.cpp new file mode 100644 index 00000000..2e6527a8 --- /dev/null +++ b/poxml/antlr/src/TokenStreamSelector.cpp @@ -0,0 +1,97 @@ +#include "antlr/TokenStreamSelector.hpp" +#include "antlr/TokenStreamRetryException.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** A token stream MUX (multiplexor) knows about n token streams + * and can multiplex them onto the same channel for use by token + * stream consumer like a parser. This is a way to have multiple + * lexers break up the same input stream for a single parser. + * Or, you can have multiple instances of the same lexer handle + * multiple input streams; this works great for includes. + */ + +TokenStreamSelector::TokenStreamSelector() +: input(0) +{ +} + +TokenStreamSelector::~TokenStreamSelector() +{ +} + +void TokenStreamSelector::addInputStream(TokenStream* stream, const ANTLR_USE_NAMESPACE(std)string& key) +{ + inputStreamNames[key] = stream; +} + +TokenStream* TokenStreamSelector::getCurrentStream() const +{ + return input; +} + +TokenStream* TokenStreamSelector::getStream(const ANTLR_USE_NAMESPACE(std)string& sname) const +{ + inputStreamNames_coll::const_iterator i = inputStreamNames.find(sname); + if (i == inputStreamNames.end()) { + throw ANTLR_USE_NAMESPACE(std)string("TokenStream ")+sname+" not found"; + } + return (*i).second; +} + +RefToken TokenStreamSelector::nextToken() +{ + // keep looking for a token until you don't + // get a retry exception + for (;;) { + try { + return input->nextToken(); + } + catch (TokenStreamRetryException& r) { + // just retry "forever" + } + } +} + +TokenStream* TokenStreamSelector::pop() +{ + TokenStream* stream = streamStack.top(); + streamStack.pop(); + select(stream); + return stream; +} + +void TokenStreamSelector::push(TokenStream* stream) +{ + streamStack.push(input); + select(stream); +} + +void TokenStreamSelector::push(const ANTLR_USE_NAMESPACE(std)string& sname) +{ + streamStack.push(input); + select(sname); +} + +void TokenStreamSelector::retry() +{ + throw TokenStreamRetryException(); +} + +/** Set the stream without pushing old stream */ +void TokenStreamSelector::select(TokenStream* stream) +{ + input = stream; +} + +void TokenStreamSelector::select(const ANTLR_USE_NAMESPACE(std)string& sname) +{ + inputStreamNames_coll::const_iterator i = inputStreamNames.find(sname); + if (i == inputStreamNames.end()) { + throw ANTLR_USE_NAMESPACE(std)string("TokenStream ")+sname+" not found"; + } + input = (*i).second; +} + +ANTLR_END_NAMESPACE + diff --git a/poxml/antlr/src/TreeParser.cpp b/poxml/antlr/src/TreeParser.cpp new file mode 100644 index 00000000..6d302737 --- /dev/null +++ b/poxml/antlr/src/TreeParser.cpp @@ -0,0 +1,165 @@ +/** + * <b>SOFTWARE RIGHTS</b> + * <p> + * ANTLR 2.6.0 MageLang Insitute, 1998 + * <p> + * We reserve no legal rights to the ANTLR--it is fully in the + * public domain. An individual or company may do whatever + * they wish with source code distributed with ANTLR or the + * code generated by ANTLR, including the incorporation of + * ANTLR, or its output, into commerical software. + * <p> + * We encourage users to develop software with ANTLR. However, + * we do ask that credit is given to us for developing + * ANTLR. By "credit", we mean that if you use ANTLR or + * incorporate any source code into one of your programs + * (commercial product, research project, or otherwise) that + * you acknowledge this fact somewhere in the documentation, + * research report, etc... If you like ANTLR and have + * developed a nice tool with the output, please mention that + * you developed it using ANTLR. In addition, we ask that the + * headers remain intact in our source code. As long as these + * guidelines are kept, we expect to continue enhancing this + * system and expect to make other tools available as they are + * completed. + * <p> + * The ANTLR gang: + * @version ANTLR 2.6.0 MageLang Insitute, 1998 + * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a> + * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a> + * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a> + */ +#include "antlr/TreeParser.hpp" +#include "antlr/ASTNULLType.hpp" +#include "antlr/MismatchedTokenException.hpp" +#include <iostream> +#include <stdlib.h> + +ANTLR_BEGIN_NAMESPACE(antlr) +ANTLR_C_USING(exit) + +TreeParser::TreeParser() +: inputState(new TreeParserInputState()), traceDepth(0) +{ +} + +TreeParser::TreeParser(const TreeParserSharedInputState& state) +: inputState(state), traceDepth(0) +{ +} + +TreeParser::~TreeParser() +{ +} + +void TreeParser::setTokenNames(const char** tokenNames_) +{ + while (*tokenNames_) { + tokenNames.push_back(*(tokenNames_++)); + } +} + +/** The AST Null object; the parsing cursor is set to this when + * it is found to be null. This way, we can test the + * token type of a node without having to have tests for null + * everywhere. + */ +RefAST TreeParser::ASTNULL(new ASTNULLType); + +/** Get the AST return value squirreled away in the parser */ +//RefAST getAST() const { +// return returnAST; +//} + +void TreeParser::match(RefAST t, int ttype) +{ + if (!t || t==ASTNULL || t->getType()!=ttype) + throw MismatchedTokenException(); +} + +/**Make sure current lookahead symbol matches the given set + * Throw an exception upon mismatch, which is caught by either the + * error handler or by the syntactic predicate. + */ +void TreeParser::match(RefAST t, const BitSet& b) +{ + if ( !t || t==ASTNULL || !b.member(t->getType()) ) { + throw MismatchedTokenException(); + } +} + +void TreeParser::matchNot(RefAST t, int ttype) +{ + //ANTLR_USE_NAMESPACE(std)cout << "match(" << ttype << "); cursor is " << t.toString() << ANTLR_USE_NAMESPACE(std)endl; + if ( !t || t==ASTNULL || t->getType()==ttype ) { + throw MismatchedTokenException(); + } +} + +void TreeParser::panic() +{ + ANTLR_USE_NAMESPACE(std)cerr << "TreeWalker: panic" << ANTLR_USE_NAMESPACE(std)endl; + exit(1); +} + +/** Parser error-reporting function can be overridden in subclass */ +void TreeParser::reportError(const RecognitionException& ex) +{ + ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser error-reporting function can be overridden in subclass */ +void TreeParser::reportError(const ANTLR_USE_NAMESPACE(std)string& s) +{ + ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Parser warning-reporting function can be overridden in subclass */ +void TreeParser::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s) +{ + ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl; +} + +/** Specify an object with support code (shared by + * Parser and TreeParser. Normally, the programmer + * does not play with this, using setASTNodeType instead. + */ +// void TreeParser::setASTFactory(ASTFactory f); + +/** Specify the type of node to create during tree building */ +void TreeParser::setASTNodeFactory(ASTFactory::factory_type factory) +{ + astFactory.setASTNodeFactory(factory); +} + +/** Procedure to write out an indent for traceIn and traceOut */ +void TreeParser::traceIndent() +{ + for( int i = 0; i < traceDepth; i++ ) + ANTLR_USE_NAMESPACE(std)cout << " "; +} + +void TreeParser::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname, RefAST t) +{ + traceDepth++; + traceIndent(); + + ANTLR_USE_NAMESPACE(std)cout << "> " << rname.c_str() + << "(" << (t ? t->toString().c_str() : "null") << ")" + << ((inputState->guessing>0)?" [guessing]":"") + << ANTLR_USE_NAMESPACE(std)endl; +} + +void TreeParser::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname, RefAST t) +{ + traceIndent(); + + ANTLR_USE_NAMESPACE(std)cout << "< " << rname.c_str() + << "(" << (t ? t->toString().c_str() : "null") << ")" + << ((inputState->guessing>0)?" [guessing]":"") + << ANTLR_USE_NAMESPACE(std)endl; + + traceDepth--; +} + +ANTLR_END_NAMESPACE diff --git a/poxml/antlr/src/TreeParserSharedInputState.cpp b/poxml/antlr/src/TreeParserSharedInputState.cpp new file mode 100644 index 00000000..89f1d5dc --- /dev/null +++ b/poxml/antlr/src/TreeParserSharedInputState.cpp @@ -0,0 +1,22 @@ +#include "antlr/TreeParserSharedInputState.hpp" + +ANTLR_BEGIN_NAMESPACE(antlr) + +/** This object contains the data associated with an + * input AST. Multiple parsers + * share a single TreeParserSharedInputState to parse + * the same tree or to have the parser walk multiple + * trees. + */ + +TreeParserInputState::TreeParserInputState() +: guessing(0) +{ +} + +TreeParserInputState::~TreeParserInputState() +{ +} + +ANTLR_END_NAMESPACE + |