summaryrefslogtreecommitdiffstats
path: root/kjs/lexer.cpp
diff options
context:
space:
mode:
authortoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
committertoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
commitce4a32fe52ef09d8f5ff1dd22c001110902b60a2 (patch)
tree5ac38a06f3dde268dc7927dc155896926aaf7012 /kjs/lexer.cpp
downloadtdelibs-ce4a32fe52ef09d8f5ff1dd22c001110902b60a2.tar.gz
tdelibs-ce4a32fe52ef09d8f5ff1dd22c001110902b60a2.zip
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.
BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'kjs/lexer.cpp')
-rw-r--r--kjs/lexer.cpp930
1 files changed, 930 insertions, 0 deletions
diff --git a/kjs/lexer.cpp b/kjs/lexer.cpp
new file mode 100644
index 000000000..054defb88
--- /dev/null
+++ b/kjs/lexer.cpp
@@ -0,0 +1,930 @@
+// -*- c-basic-offset: 2 -*-
+/*
+ * This file is part of the KDE libraries
+ * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "value.h"
+#include "object.h"
+#include "types.h"
+#include "interpreter.h"
+#include "nodes.h"
+#include "lexer.h"
+#include "identifier.h"
+#include "lookup.h"
+#include "internal.h"
+#include "dtoa.h"
+
+// we can't specify the namespace in yacc's C output, so do it here
+using namespace KJS;
+
+static Lexer *currLexer = 0;
+
+#ifndef KDE_USE_FINAL
+#include "grammar.h"
+#endif
+
+#include "lexer.lut.h"
+
+extern YYLTYPE yylloc; // global bison variable holding token info
+
+// a bridge for yacc from the C world to C++
+int kjsyylex()
+{
+ return Lexer::curr()->lex();
+}
+
+Lexer::Lexer()
+ : yylineno(1),
+ size8(128), size16(128), restrKeyword(false),
+ convertNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
+ code(0), length(0),
+#ifndef KJS_PURE_ECMA
+ bol(true),
+#endif
+ current(0), next1(0), next2(0), next3(0),
+ strings(0), numStrings(0), stringsCapacity(0),
+ identifiers(0), numIdentifiers(0), identifiersCapacity(0)
+{
+ // allocate space for read buffers
+ buffer8 = new char[size8];
+ buffer16 = new UChar[size16];
+ currLexer = this;
+}
+
+Lexer::~Lexer()
+{
+ delete [] buffer8;
+ delete [] buffer16;
+}
+
+Lexer *Lexer::curr()
+{
+ if (!currLexer) {
+ // create singleton instance
+ currLexer = new Lexer();
+ }
+ return currLexer;
+}
+
+#ifdef KJS_DEBUG_MEM
+void Lexer::globalClear()
+{
+ delete currLexer;
+ currLexer = 0L;
+}
+#endif
+
+void Lexer::setCode(const UChar *c, unsigned int len)
+{
+ yylineno = 1;
+ restrKeyword = false;
+ delimited = false;
+ convertNextIdentifier = false;
+ stackToken = -1;
+ lastToken = -1;
+ foundBad = false;
+ pos = 0;
+ code = c;
+ length = len;
+ skipLF = false;
+ skipCR = false;
+#ifndef KJS_PURE_ECMA
+ bol = true;
+#endif
+
+ // read first characters
+ current = (length > 0) ? code[0].uc : -1;
+ next1 = (length > 1) ? code[1].uc : -1;
+ next2 = (length > 2) ? code[2].uc : -1;
+ next3 = (length > 3) ? code[3].uc : -1;
+}
+
+void Lexer::shift(unsigned int p)
+{
+ while (p--) {
+ pos++;
+ current = next1;
+ next1 = next2;
+ next2 = next3;
+ next3 = (pos + 3 < length) ? code[pos+3].uc : -1;
+ }
+}
+
+// called on each new line
+void Lexer::nextLine()
+{
+ yylineno++;
+#ifndef KJS_PURE_ECMA
+ bol = true;
+#endif
+}
+
+void Lexer::setDone(State s)
+{
+ state = s;
+ done = true;
+}
+
+int Lexer::lex()
+{
+ int token = 0;
+ state = Start;
+ unsigned short stringType = 0; // either single or double quotes
+ pos8 = pos16 = 0;
+ done = false;
+ terminator = false;
+ skipLF = false;
+ skipCR = false;
+
+ // did we push a token on the stack previously ?
+ // (after an automatic semicolon insertion)
+ if (stackToken >= 0) {
+ setDone(Other);
+ token = stackToken;
+ stackToken = 0;
+ }
+
+ while (!done) {
+ if (skipLF && current != '\n') // found \r but not \n afterwards
+ skipLF = false;
+ if (skipCR && current != '\r') // found \n but not \r afterwards
+ skipCR = false;
+ if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
+ {
+ skipLF = false;
+ skipCR = false;
+ shift(1);
+ }
+
+ bool cr = (current == '\r');
+ bool lf = (current == '\n');
+ if (cr)
+ skipLF = true;
+ else if (lf)
+ skipCR = true;
+ bool isLineTerminator = cr || lf;
+
+ switch (state) {
+ case Start:
+ if (isWhiteSpace(current)) {
+ // do nothing
+ } else if (current == '/' && next1 == '/') {
+ shift(1);
+ state = InSingleLineComment;
+ } else if (current == '/' && next1 == '*') {
+ shift(1);
+ state = InMultiLineComment;
+ } else if (current == -1) {
+ if (!terminator && !delimited) {
+ // automatic semicolon insertion if program incomplete
+ token = ';';
+ stackToken = 0;
+ setDone(Other);
+ } else
+ setDone(Eof);
+ } else if (isLineTerminator) {
+ nextLine();
+ terminator = true;
+ if (restrKeyword) {
+ token = ';';
+ setDone(Other);
+ }
+ } else if (current == '"' || current == '\'') {
+ state = InString;
+ stringType = current;
+ } else if (isIdentLetter(current)) {
+ record16(current);
+ state = InIdentifierOrKeyword;
+ } else if (current == '\\') {
+ state = InIdentifierUnicodeEscapeStart;
+ } else if (current == '0') {
+ record8(current);
+ state = InNum0;
+ } else if (isDecimalDigit(current)) {
+ record8(current);
+ state = InNum;
+ } else if (current == '.' && isDecimalDigit(next1)) {
+ record8(current);
+ state = InDecimal;
+#ifndef KJS_PURE_ECMA
+ // <!-- marks the beginning of a line comment (for www usage)
+ } else if (current == '<' && next1 == '!' &&
+ next2 == '-' && next3 == '-') {
+ shift(3);
+ state = InSingleLineComment;
+ // same for -->
+ } else if (bol && current == '-' && next1 == '-' && next2 == '>') {
+ shift(2);
+ state = InSingleLineComment;
+#endif
+ } else {
+ token = matchPunctuator(current, next1, next2, next3);
+ if (token != -1) {
+ setDone(Other);
+ } else {
+ // cerr << "encountered unknown character" << endl;
+ setDone(Bad);
+ }
+ }
+ break;
+ case InString:
+ if (current == stringType) {
+ shift(1);
+ setDone(String);
+ } else if (current == -1 || isLineTerminator) {
+ setDone(Bad);
+ } else if (current == '\\') {
+ state = InEscapeSequence;
+ } else {
+ record16(current);
+ }
+ break;
+ // Escape Sequences inside of strings
+ case InEscapeSequence:
+ if (isOctalDigit(current)) {
+ if (current >= '0' && current <= '3' &&
+ isOctalDigit(next1) && isOctalDigit(next2)) {
+ record16(convertOctal(current, next1, next2));
+ shift(2);
+ state = InString;
+ } else if (isOctalDigit(current) && isOctalDigit(next1)) {
+ record16(convertOctal('0', current, next1));
+ shift(1);
+ state = InString;
+ } else if (isOctalDigit(current)) {
+ record16(convertOctal('0', '0', current));
+ state = InString;
+ } else {
+ setDone(Bad);
+ }
+ } else if (current == 'x')
+ state = InHexEscape;
+ else if (current == 'u')
+ state = InUnicodeEscape;
+ else {
+ if (isLineTerminator)
+ nextLine();
+ record16(singleEscape(current));
+ state = InString;
+ }
+ break;
+ case InHexEscape:
+ if (isHexDigit(current) && isHexDigit(next1)) {
+ state = InString;
+ record16(convertHex(current, next1));
+ shift(1);
+ } else if (current == stringType) {
+ record16('x');
+ shift(1);
+ setDone(String);
+ } else {
+ record16('x');
+ record16(current);
+ state = InString;
+ }
+ break;
+ case InUnicodeEscape:
+ if (isHexDigit(current) && isHexDigit(next1) &&
+ isHexDigit(next2) && isHexDigit(next3)) {
+ record16(convertUnicode(current, next1, next2, next3));
+ shift(3);
+ state = InString;
+ } else if (current == stringType) {
+ record16('u');
+ shift(1);
+ setDone(String);
+ } else {
+ setDone(Bad);
+ }
+ break;
+ case InSingleLineComment:
+ if (isLineTerminator) {
+ nextLine();
+ terminator = true;
+ if (restrKeyword) {
+ token = ';';
+ setDone(Other);
+ } else
+ state = Start;
+ } else if (current == -1) {
+ setDone(Eof);
+ }
+ break;
+ case InMultiLineComment:
+ if (current == -1) {
+ setDone(Bad);
+ } else if (isLineTerminator) {
+ nextLine();
+ } else if (current == '*' && next1 == '/') {
+ state = Start;
+ shift(1);
+ }
+ break;
+ case InIdentifierOrKeyword:
+ case InIdentifier:
+ if (isIdentLetter(current) || isDecimalDigit(current))
+ record16(current);
+ else if (current == '\\')
+ state = InIdentifierUnicodeEscapeStart;
+ else
+ setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
+ break;
+ case InNum0:
+ if (current == 'x' || current == 'X') {
+ record8(current);
+ state = InHex;
+ } else if (current == '.') {
+ record8(current);
+ state = InDecimal;
+ } else if (current == 'e' || current == 'E') {
+ record8(current);
+ state = InExponentIndicator;
+ } else if (isOctalDigit(current)) {
+ record8(current);
+ state = InOctal;
+ } else if (isDecimalDigit(current)) {
+ record8(current);
+ state = InDecimal;
+ } else {
+ setDone(Number);
+ }
+ break;
+ case InHex:
+ if (isHexDigit(current)) {
+ record8(current);
+ } else {
+ setDone(Hex);
+ }
+ break;
+ case InOctal:
+ if (isOctalDigit(current)) {
+ record8(current);
+ }
+ else if (isDecimalDigit(current)) {
+ record8(current);
+ state = InDecimal;
+ } else
+ setDone(Octal);
+ break;
+ case InNum:
+ if (isDecimalDigit(current)) {
+ record8(current);
+ } else if (current == '.') {
+ record8(current);
+ state = InDecimal;
+ } else if (current == 'e' || current == 'E') {
+ record8(current);
+ state = InExponentIndicator;
+ } else
+ setDone(Number);
+ break;
+ case InDecimal:
+ if (isDecimalDigit(current)) {
+ record8(current);
+ } else if (current == 'e' || current == 'E') {
+ record8(current);
+ state = InExponentIndicator;
+ } else
+ setDone(Number);
+ break;
+ case InExponentIndicator:
+ if (current == '+' || current == '-') {
+ record8(current);
+ } else if (isDecimalDigit(current)) {
+ record8(current);
+ state = InExponent;
+ } else
+ setDone(Bad);
+ break;
+ case InExponent:
+ if (isDecimalDigit(current)) {
+ record8(current);
+ } else
+ setDone(Number);
+ break;
+ case InIdentifierUnicodeEscapeStart:
+ if (current == 'u')
+ state = InIdentifierUnicodeEscape;
+ else
+ setDone(Bad);
+ break;
+ case InIdentifierUnicodeEscape:
+ if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
+ record16(convertUnicode(current, next1, next2, next3));
+ shift(3);
+ state = InIdentifier;
+ } else {
+ setDone(Bad);
+ }
+ break;
+ default:
+ assert(!"Unhandled state in switch statement");
+ }
+
+ // move on to the next character
+ if (!done)
+ shift(1);
+#ifndef KJS_PURE_ECMA
+ if (state != Start && state != InSingleLineComment)
+ bol = false;
+#endif
+ }
+
+ // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
+ if ((state == Number || state == Octal || state == Hex)
+ && isIdentLetter(current))
+ state = Bad;
+
+ // terminate string
+ buffer8[pos8] = '\0';
+
+#ifdef KJS_DEBUG_LEX
+ fprintf(stderr, "line: %d ", lineNo());
+ fprintf(stderr, "yytext (%x): ", buffer8[0]);
+ fprintf(stderr, "%s ", buffer8);
+#endif
+
+ long double dval = 0;
+ if (state == Number) {
+ dval = kjs_strtod(buffer8, 0L);
+ } else if (state == Hex) { // scan hex numbers
+ dval = 0;
+ if (buffer8[0] == '0' && (buffer8[1] == 'x' || buffer8[1] == 'X')) {
+ for (const char *p = buffer8+2; *p; p++) {
+ if (!isHexDigit(*p)) {
+ dval = 0;
+ break;
+ }
+ dval = dval * 16 + convertHex(*p);
+ }
+ }
+ state = Number;
+ } else if (state == Octal) { // scan octal number
+ dval = 0;
+ if (buffer8[0] == '0') {
+ for (const char *p = buffer8+1; *p; p++) {
+ if (*p < '0' || *p > '7') {
+ dval = 0;
+ break;
+ }
+ dval = dval * 8 + *p - '0';
+ }
+ }
+ state = Number;
+ }
+
+#ifdef KJS_DEBUG_LEX
+ switch (state) {
+ case Eof:
+ printf("(EOF)\n");
+ break;
+ case Other:
+ printf("(Other)\n");
+ break;
+ case Identifier:
+ case IdentifierOrKeyword:
+ printf("(Identifier)/(Keyword)\n");
+ break;
+ case String:
+ printf("(String)\n");
+ break;
+ case Number:
+ printf("(Number)\n");
+ break;
+ default:
+ printf("(unknown)");
+ }
+#endif
+
+ if (state != Identifier && state != IdentifierOrKeyword &&
+ convertNextIdentifier)
+ convertNextIdentifier = false;
+
+ restrKeyword = false;
+ delimited = false;
+ kjsyylloc.first_line = yylineno; // ???
+ kjsyylloc.last_line = yylineno;
+
+ switch (state) {
+ case Eof:
+ token = 0;
+ break;
+ case Other:
+ if(token == '}' || token == ';') {
+ delimited = true;
+ }
+ break;
+ case IdentifierOrKeyword:
+ if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
+ case Identifier:
+ // Lookup for keyword failed, means this is an identifier
+ // Apply anonymous-function hack below (convert the identifier)
+ if (convertNextIdentifier) {
+ convertNextIdentifier = false;
+#ifdef KJS_VERBOSE
+ UString debugstr(buffer16, pos16); fprintf(stderr,"Anonymous function hack: eating identifier %s\n",debugstr.ascii());
+#endif
+ token = FUNCEXPRIDENT;
+ } else {
+ token = IDENT;
+ }
+ /* TODO: close leak on parse error. same holds true for String */
+ kjsyylval.ident = makeIdentifier(buffer16, pos16);
+ break;
+ }
+
+ convertNextIdentifier = false;
+ // Hack for "f = function somename() { ... }", too hard to get into the grammar
+ // Same for building an array with function pointers ( 'name', func1, 'name2', func2 )
+ // There are lots of other uses, we really have to get this into the grammar
+ if ( token == FUNCTION &&
+ ( lastToken == '=' || lastToken == ',' || lastToken == '(' ||
+ lastToken == ':' || lastToken == RETURN ) )
+ convertNextIdentifier = true;
+
+ if (token == CONTINUE || token == BREAK ||
+ token == RETURN || token == THROW)
+ restrKeyword = true;
+ break;
+ case String:
+ kjsyylval.ustr = makeUString(buffer16, pos16);
+ token = STRING;
+ break;
+ case Number:
+ kjsyylval.dval = dval;
+ token = NUMBER;
+ break;
+ case Bad:
+ foundBad = true;
+ return -1;
+ default:
+ assert(!"unhandled numeration value in switch");
+ return -1;
+ }
+ lastToken = token;
+ return token;
+}
+
+bool Lexer::isWhiteSpace(unsigned short c)
+{
+ return (c == ' ' || c == '\t' ||
+ c == 0x0b || c == 0x0c || c == 0xa0);
+}
+
+bool Lexer::isIdentLetter(unsigned short c)
+{
+ // Allow any character in the Unicode categories
+ // Uppercase letter (Lu), Lowercase letter (Ll),
+ // Titlecase letter (Lt)", Modifier letter (Lm),
+ // Other letter (Lo), or Letter number (Nl).
+ // Also see: http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
+ return (c >= 'a' && c <= 'z' ||
+ c >= 'A' && c <= 'Z' ||
+ // A with grave - O with diaeresis
+ c >= 0x00c0 && c <= 0x00d6 ||
+ // O with stroke - o with diaeresis
+ c >= 0x00d8 && c <= 0x00f6 ||
+ // o with stroke - turned h with fishook and tail
+ c >= 0x00f8 && c <= 0x02af ||
+ // Greek etc. TODO: not precise
+ c >= 0x0388 && c <= 0x1ffc ||
+ c == '$' || c == '_');
+ /* TODO: use complete category table */
+}
+
+bool Lexer::isDecimalDigit(unsigned short c)
+{
+ return (c >= '0' && c <= '9');
+}
+
+bool Lexer::isHexDigit(unsigned short c)
+{
+ return (c >= '0' && c <= '9' ||
+ c >= 'a' && c <= 'f' ||
+ c >= 'A' && c <= 'F');
+}
+
+bool Lexer::isOctalDigit(unsigned short c)
+{
+ return (c >= '0' && c <= '7');
+}
+
+int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,
+ unsigned short c3, unsigned short c4)
+{
+ if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
+ shift(4);
+ return URSHIFTEQUAL;
+ } else if (c1 == '=' && c2 == '=' && c3 == '=') {
+ shift(3);
+ return STREQ;
+ } else if (c1 == '!' && c2 == '=' && c3 == '=') {
+ shift(3);
+ return STRNEQ;
+ } else if (c1 == '>' && c2 == '>' && c3 == '>') {
+ shift(3);
+ return URSHIFT;
+ } else if (c1 == '<' && c2 == '<' && c3 == '=') {
+ shift(3);
+ return LSHIFTEQUAL;
+ } else if (c1 == '>' && c2 == '>' && c3 == '=') {
+ shift(3);
+ return RSHIFTEQUAL;
+ } else if (c1 == '<' && c2 == '=') {
+ shift(2);
+ return LE;
+ } else if (c1 == '>' && c2 == '=') {
+ shift(2);
+ return GE;
+ } else if (c1 == '!' && c2 == '=') {
+ shift(2);
+ return NE;
+ } else if (c1 == '+' && c2 == '+') {
+ shift(2);
+ if (terminator)
+ return AUTOPLUSPLUS;
+ else
+ return PLUSPLUS;
+ } else if (c1 == '-' && c2 == '-') {
+ shift(2);
+ if (terminator)
+ return AUTOMINUSMINUS;
+ else
+ return MINUSMINUS;
+ } else if (c1 == '=' && c2 == '=') {
+ shift(2);
+ return EQEQ;
+ } else if (c1 == '+' && c2 == '=') {
+ shift(2);
+ return PLUSEQUAL;
+ } else if (c1 == '-' && c2 == '=') {
+ shift(2);
+ return MINUSEQUAL;
+ } else if (c1 == '*' && c2 == '=') {
+ shift(2);
+ return MULTEQUAL;
+ } else if (c1 == '/' && c2 == '=') {
+ shift(2);
+ return DIVEQUAL;
+ } else if (c1 == '&' && c2 == '=') {
+ shift(2);
+ return ANDEQUAL;
+ } else if (c1 == '^' && c2 == '=') {
+ shift(2);
+ return XOREQUAL;
+ } else if (c1 == '%' && c2 == '=') {
+ shift(2);
+ return MODEQUAL;
+ } else if (c1 == '|' && c2 == '=') {
+ shift(2);
+ return OREQUAL;
+ } else if (c1 == '<' && c2 == '<') {
+ shift(2);
+ return LSHIFT;
+ } else if (c1 == '>' && c2 == '>') {
+ shift(2);
+ return RSHIFT;
+ } else if (c1 == '&' && c2 == '&') {
+ shift(2);
+ return AND;
+ } else if (c1 == '|' && c2 == '|') {
+ shift(2);
+ return OR;
+ }
+
+ switch(c1) {
+ case '=':
+ case '>':
+ case '<':
+ case ',':
+ case '!':
+ case '~':
+ case '?':
+ case ':':
+ case '.':
+ case '+':
+ case '-':
+ case '*':
+ case '/':
+ case '&':
+ case '|':
+ case '^':
+ case '%':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case ';':
+ shift(1);
+ return static_cast<int>(c1);
+ default:
+ return -1;
+ }
+}
+
+unsigned short Lexer::singleEscape(unsigned short c) const
+{
+ switch(c) {
+ case 'b':
+ return 0x08;
+ case 't':
+ return 0x09;
+ case 'n':
+ return 0x0A;
+ case 'v':
+ return 0x0B;
+ case 'f':
+ return 0x0C;
+ case 'r':
+ return 0x0D;
+ case '"':
+ return 0x22;
+ case '\'':
+ return 0x27;
+ case '\\':
+ return 0x5C;
+ default:
+ return c;
+ }
+}
+
+unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,
+ unsigned short c3) const
+{
+ return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
+}
+
+unsigned char Lexer::convertHex(unsigned short c)
+{
+ if (c >= '0' && c <= '9')
+ return (c - '0');
+ else if (c >= 'a' && c <= 'f')
+ return (c - 'a' + 10);
+ else
+ return (c - 'A' + 10);
+}
+
+unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2)
+{
+ return ((convertHex(c1) << 4) + convertHex(c2));
+}
+
+UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,
+ unsigned short c3, unsigned short c4)
+{
+ return UChar((convertHex(c1) << 4) + convertHex(c2),
+ (convertHex(c3) << 4) + convertHex(c4));
+}
+
+void Lexer::record8(unsigned short c)
+{
+ assert(c <= 0xff);
+
+ // enlarge buffer if full
+ if (pos8 >= size8 - 1) {
+ char *tmp = new char[2 * size8];
+ memcpy(tmp, buffer8, size8 * sizeof(char));
+ delete [] buffer8;
+ buffer8 = tmp;
+ size8 *= 2;
+ }
+
+ buffer8[pos8++] = (char) c;
+}
+
+void Lexer::record16(int c)
+{
+ assert(c >= 0);
+ //assert(c <= USHRT_MAX);
+ record16(UChar(static_cast<unsigned short>(c)));
+}
+
+void Lexer::record16(UChar c)
+{
+ // enlarge buffer if full
+ if (pos16 >= size16 - 1) {
+ UChar *tmp = new UChar[2 * size16];
+ memcpy(tmp, buffer16, size16 * sizeof(UChar));
+ delete [] buffer16;
+ buffer16 = tmp;
+ size16 *= 2;
+ }
+
+ buffer16[pos16++] = c;
+}
+
+bool Lexer::scanRegExp()
+{
+ pos16 = 0;
+ bool lastWasEscape = false;
+ bool inBrackets = false;
+
+ while (1) {
+ if (current == '\r' || current == '\n' || current == -1)
+ return false;
+ else if (current != '/' || lastWasEscape == true || inBrackets == true)
+ {
+ // keep track of '[' and ']'
+ if ( !lastWasEscape ) {
+ if ( current == '[' && !inBrackets )
+ inBrackets = true;
+ if ( current == ']' && inBrackets )
+ inBrackets = false;
+ }
+ record16(current);
+ lastWasEscape =
+ !lastWasEscape && (current == '\\');
+ }
+ else { // end of regexp
+ pattern = UString(buffer16, pos16);
+ pos16 = 0;
+ shift(1);
+ break;
+ }
+ shift(1);
+ }
+
+ while (isIdentLetter(current)) {
+ record16(current);
+ shift(1);
+ }
+ flags = UString(buffer16, pos16);
+
+ return true;
+}
+
+
+void Lexer::doneParsing()
+{
+ for (unsigned i = 0; i < numIdentifiers; i++) {
+ delete identifiers[i];
+ }
+ free(identifiers);
+ identifiers = 0;
+ numIdentifiers = 0;
+ identifiersCapacity = 0;
+
+ for (unsigned i = 0; i < numStrings; i++) {
+ delete strings[i];
+ }
+ free(strings);
+ strings = 0;
+ numStrings = 0;
+ stringsCapacity = 0;
+}
+
+const int initialCapacity = 64;
+const int growthFactor = 2;
+
+Identifier *Lexer::makeIdentifier(UChar *buffer, unsigned int pos)
+{
+ if (numIdentifiers == identifiersCapacity) {
+ identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
+ identifiers = (KJS::Identifier **)realloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);
+ }
+
+ KJS::Identifier *identifier = new KJS::Identifier(buffer, pos);
+ identifiers[numIdentifiers++] = identifier;
+ return identifier;
+}
+
+UString *Lexer::makeUString(UChar *buffer, unsigned int pos)
+{
+ if (numStrings == stringsCapacity) {
+ stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
+ strings = (UString **)realloc(strings, sizeof(UString *) * stringsCapacity);
+ }
+
+ UString *string = new UString(buffer, pos);
+ strings[numStrings++] = string;
+ return string;
+}