summaryrefslogtreecommitdiffstats
path: root/poxml/antlr/src/CharScanner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'poxml/antlr/src/CharScanner.cpp')
-rw-r--r--poxml/antlr/src/CharScanner.cpp430
1 files changed, 430 insertions, 0 deletions
diff --git a/poxml/antlr/src/CharScanner.cpp b/poxml/antlr/src/CharScanner.cpp
new file mode 100644
index 00000000..ff40138d
--- /dev/null
+++ b/poxml/antlr/src/CharScanner.cpp
@@ -0,0 +1,430 @@
+/**
+ * <b>SOFTWARE RIGHTS</b>
+ * <p>
+ * ANTLR 2.6.0 MageLang Insitute, 1998
+ * <p>
+ * We reserve no legal rights to the ANTLR--it is fully in the
+ * public domain. An individual or company may do whatever
+ * they wish with source code distributed with ANTLR or the
+ * code generated by ANTLR, including the incorporation of
+ * ANTLR, or its output, into commerical software.
+ * <p>
+ * We encourage users to develop software with ANTLR. However,
+ * we do ask that credit is given to us for developing
+ * ANTLR. By "credit", we mean that if you use ANTLR or
+ * incorporate any source code into one of your programs
+ * (commercial product, research project, or otherwise) that
+ * you acknowledge this fact somewhere in the documentation,
+ * research report, etc... If you like ANTLR and have
+ * developed a nice tool with the output, please mention that
+ * you developed it using ANTLR. In addition, we ask that the
+ * headers remain intact in our source code. As long as these
+ * guidelines are kept, we expect to continue enhancing this
+ * system and expect to make other tools available as they are
+ * completed.
+ * <p>
+ * The ANTLR gang:
+ * @version ANTLR 2.6.0 MageLang Insitute, 1998
+ * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
+ * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
+ * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
+ */
+
+#include "antlr/CharScanner.hpp"
+#include "antlr/CommonToken.hpp"
+#include "antlr/MismatchedCharException.hpp"
+#include <map>
+
+#ifdef HAS_NOT_CCTYPE_H
+#include <ctype.h>
+#else
+#include <cctype>
+#endif
+
+#include <iostream>
+
+#ifdef HAS_NOT_CSTRING_H
+#include <string>
+#else
+#include <cstring>
+#endif
+#include <stdlib.h>
+
+ANTLR_BEGIN_NAMESPACE(antlr)
+ANTLR_C_USING(exit)
+ANTLR_C_USING(tolower)
+
+#ifdef ANTLR_REALLY_NO_STRCASECMP
+// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
+// on the mac has neither...
+inline int strcasecmp(const char *s1, const char *s2)
+{
+ while (true)
+ {
+ char c1 = tolower(*s1++),
+ c2 = tolower(*s2++);
+ if (c1 < c2) return -1;
+ if (c1 > c2) return 1;
+ if (c1 == 0) return 0;
+ }
+}
+#else
+#ifdef NO_STRCASECMP
+ANTLR_C_USING(stricmp)
+#else
+ANTLR_C_USING(strcasecmp)
+#endif
+#endif
+
+CharScannerLiteralsLess::CharScannerLiteralsLess(const CharScanner* theScanner)
+: scanner(theScanner)
+{}
+
+bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
+{
+ if (scanner->getCaseSensitiveLiterals()) {
+ return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
+ } else {
+#ifdef NO_STRCASECMP
+ return (stricmp(x.c_str(),y.c_str())<0);
+#else
+ return (strcasecmp(x.c_str(),y.c_str())<0);
+#endif
+ }
+}
+
+CharScanner::CharScanner(InputBuffer& cb)
+ : saveConsumedInput(true) //, caseSensitiveLiterals(true)
+ , literals(CharScannerLiteralsLess(this))
+ , inputState(new LexerInputState(cb))
+ , commitToPath(false)
+ , traceDepth(0)
+{
+ setTokenObjectFactory(&CommonToken::factory);
+}
+
+CharScanner::CharScanner(InputBuffer* cb)
+ : saveConsumedInput(true) //, caseSensitiveLiterals(true)
+ , literals(CharScannerLiteralsLess(this))
+ , inputState(new LexerInputState(cb))
+ , commitToPath(false)
+ , traceDepth(0)
+{
+ setTokenObjectFactory(&CommonToken::factory);
+}
+
+CharScanner::CharScanner(const LexerSharedInputState& state)
+ : saveConsumedInput(true) //, caseSensitiveLiterals(true)
+ , literals(CharScannerLiteralsLess(this))
+ , inputState(state)
+ , commitToPath(false)
+ , traceDepth(0)
+{
+ setTokenObjectFactory(&CommonToken::factory);
+}
+
+CharScanner::~CharScanner()
+{
+}
+
+void CharScanner::append(char c)
+{
+ if (saveConsumedInput) {
+ int l = text.length();
+ if ((l%256) == 0) text.reserve(l+256);
+ text.replace(l,0,&c,1);
+ }
+}
+
+void CharScanner::append(const ANTLR_USE_NAMESPACE(std)string& s)
+{
+ if (saveConsumedInput)
+ text+=s;
+}
+
+void CharScanner::commit()
+{
+ inputState->getInput().commit();
+}
+
+void CharScanner::consume()
+{
+ if (inputState->guessing == 0) {
+ int c = LA(1);
+ if (caseSensitive) {
+ append(c);
+ } else {
+ // use input.LA(), not LA(), to get original case
+ // CharScanner.LA() would toLower it.
+ append(inputState->getInput().LA(1));
+ }
+ if (c == '\t') {
+ tab();
+ }
+ else {
+ inputState->column++;
+ }
+ }
+ inputState->getInput().consume();
+}
+
+/** Consume chars until one matches the given char */
+void CharScanner::consumeUntil(int c)
+{
+ while (LA(1) != EOF_CHAR && LA(1) != c)
+ {
+ consume();
+ }
+}
+
+/** Consume chars until one matches the given set */
+void CharScanner::consumeUntil(const BitSet& set)
+{
+ while (LA(1) != EOF_CHAR && !set.member(LA(1))) {
+ consume();
+ }
+}
+
+bool CharScanner::getCaseSensitive() const
+{ return caseSensitive; }
+
+//bool CharScanner::getCaseSensitiveLiterals() const
+//{ return caseSensitiveLiterals; }
+
+int CharScanner::getColumn() const
+{ return inputState->column; }
+
+void CharScanner::setColumn(int c)
+{ inputState->column = c; }
+
+bool CharScanner::getCommitToPath() const
+{ return commitToPath; }
+
+const ANTLR_USE_NAMESPACE(std)string& CharScanner::getFilename() const
+{ return inputState->filename; }
+
+InputBuffer& CharScanner::getInputBuffer()
+{ return inputState->getInput(); }
+
+LexerSharedInputState CharScanner::getInputState()
+{ return inputState; }
+
+int CharScanner::getLine() const
+{ return inputState->line; }
+
+/** return a copy of the current text buffer */
+const ANTLR_USE_NAMESPACE(std)string& CharScanner::getText() const
+{ return text; }
+
+RefToken CharScanner::getTokenObject() const
+{ return _returnToken; }
+
+RefToken CharScanner::makeToken(int t)
+{
+ RefToken tok=tokenFactory();
+ tok->setType(t);
+ tok->setColumn(inputState->tokenStartColumn);
+ tok->setLine(inputState->tokenStartLine);
+ return tok;
+}
+
+int CharScanner::mark()
+{
+ return inputState->getInput().mark();
+}
+
+void CharScanner::match(int c)
+{
+ if ( LA(1) != c ) {
+ throw MismatchedCharException(LA(1),c,false,this);
+ }
+ consume();
+}
+
+void CharScanner::match(const BitSet& b)
+{
+ if (!b.member(LA(1))) {
+ throw MismatchedCharException(LA(1),b,false,this);
+ }
+ consume();
+}
+
+void CharScanner::match(const ANTLR_USE_NAMESPACE(std)string& s)
+{
+ int len = s.length();
+ for (int i=0; i<len; i++) {
+ if ( LA(1) != s[i] ) {
+ throw MismatchedCharException(LA(1),s[i],false,this);
+ }
+ consume();
+ }
+}
+
+void CharScanner::matchNot(int c)
+{
+ if ( LA(1) == c ) {
+ throw MismatchedCharException(LA(1),c,true,this);
+ }
+ consume();
+}
+
+void CharScanner::matchRange(int c1, int c2)
+{
+ if (LA(1)<c1 || LA(1)>c2) {
+ throw MismatchedCharException(LA(1),c1,c2,false,this);
+ }
+ consume();
+}
+
+void CharScanner::newline()
+{
+ ++inputState->line;
+ inputState->column=1;
+}
+
+/** advance the current column number by an appropriate amount.
+ * If you do not override this to specify how much to jump for
+ * a tab, then tabs are counted as one char. This method is
+ * called from consume().
+ */
+void CharScanner::tab() {
+ // update inputState->column as function of
+ // inputState->column and tab stops.
+ // For example, if tab stops are columns 1 and 5 etc...
+ // and column is 3, then add 2 to column.
+ ++inputState->column;
+}
+
+void CharScanner::panic()
+{
+ ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic" << ANTLR_USE_NAMESPACE(std)endl;
+ exit(1);
+}
+
+void CharScanner::panic(const ANTLR_USE_NAMESPACE(std)string& s)
+{
+ ANTLR_USE_NAMESPACE(std)cerr << "CharScanner: panic: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
+ exit(1);
+}
+
+/** Report exception errors caught in nextToken() */
+void CharScanner::reportError(const RecognitionException& ex)
+{
+ ANTLR_USE_NAMESPACE(std)cerr << ex.toString().c_str() << ANTLR_USE_NAMESPACE(std)endl;
+}
+
+/** Parser error-reporting function can be overridden in subclass */
+void CharScanner::reportError(const ANTLR_USE_NAMESPACE(std)string& s)
+{
+ if (getFilename().empty())
+ ANTLR_USE_NAMESPACE(std)cerr << "error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
+ else
+ ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": error: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
+}
+
+/** Parser warning-reporting function can be overridden in subclass */
+void CharScanner::reportWarning(const ANTLR_USE_NAMESPACE(std)string& s)
+{
+ if (getFilename().empty())
+ ANTLR_USE_NAMESPACE(std)cerr << "warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
+ else
+ ANTLR_USE_NAMESPACE(std)cerr << getFilename().c_str() << ": warning: " << s.c_str() << ANTLR_USE_NAMESPACE(std)endl;
+}
+
+void CharScanner::resetText()
+{
+ text="";
+ inputState->tokenStartColumn = inputState->column;
+ inputState->tokenStartLine = inputState->line;
+}
+
+void CharScanner::rewind(int pos)
+{
+ inputState->getInput().rewind(pos);
+}
+
+void CharScanner::setCaseSensitive(bool t)
+{
+ caseSensitive = t;
+}
+
+void CharScanner::setCommitToPath(bool commit)
+{
+ commitToPath = commit;
+}
+
+void CharScanner::setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
+{ inputState->filename=f; }
+
+void CharScanner::setInputState(LexerSharedInputState state)
+{ inputState = state; }
+
+void CharScanner::setLine(int l)
+{ inputState->line=l; }
+
+void CharScanner::setText(const ANTLR_USE_NAMESPACE(std)string& s)
+{ text=s; }
+
+void CharScanner::setTokenObjectFactory(factory_type factory)
+{ tokenFactory=factory; }
+
+/** Test the token text against the literals table
+ * Override this method to perform a different literals test */
+int CharScanner::testLiteralsTable(int ttype) const
+{
+ ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
+ if (i != literals.end())
+ ttype = (*i).second;
+ return ttype;
+}
+
+/** Test the text passed in against the literals table
+ * Override this method to perform a different literals test
+ * This is used primarily when you want to test a portion of
+ * a token.
+ */
+int CharScanner::testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text_, int ttype) const
+{
+ ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text_);
+ if (i != literals.end())
+ ttype = (*i).second;
+ return ttype;
+}
+
+/** Override this method to get more specific case handling */
+int CharScanner::toLower(int c) const
+{
+ return tolower(c);
+}
+
+void CharScanner::traceIndent()
+{
+ for( int i = 0; i < traceDepth; i++ )
+ ANTLR_USE_NAMESPACE(std)cout << " ";
+}
+
+void CharScanner::traceIn(const ANTLR_USE_NAMESPACE(std)string& rname)
+{
+ traceDepth++;
+ traceIndent();
+ ANTLR_USE_NAMESPACE(std)cout << "> lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
+}
+
+void CharScanner::traceOut(const ANTLR_USE_NAMESPACE(std)string& rname)
+{
+ traceIndent();
+ ANTLR_USE_NAMESPACE(std)cout << "< lexer " << rname.c_str() << "; c==" << LA(1) << ANTLR_USE_NAMESPACE(std)endl;
+ traceDepth--;
+}
+
+void CharScanner::uponEOF()
+{
+}
+
+#ifndef NO_STATIC_CONSTS
+const int CharScanner::NO_CHAR;
+const int CharScanner::EOF_CHAR;
+#endif
+
+ANTLR_END_NAMESPACE
+