From 3b0c3b8206964b85bf3716c962d26dd15c4f285f Mon Sep 17 00:00:00 2001 From: Michele Calgaro Date: Mon, 18 Nov 2024 22:24:33 +0900 Subject: Remove uncrustify-trinity. It is no longer necessary to maintain a customize trinity version. Upstream version can be used as is. Signed-off-by: Michele Calgaro --- .../tests/expected/d/40006-Lexer.d | 2499 -------------------- 1 file changed, 2499 deletions(-) delete mode 100644 debian/uncrustify-trinity/uncrustify-trinity-0.78.1/tests/expected/d/40006-Lexer.d (limited to 'debian/uncrustify-trinity/uncrustify-trinity-0.78.1/tests/expected/d/40006-Lexer.d') diff --git a/debian/uncrustify-trinity/uncrustify-trinity-0.78.1/tests/expected/d/40006-Lexer.d b/debian/uncrustify-trinity/uncrustify-trinity-0.78.1/tests/expected/d/40006-Lexer.d deleted file mode 100644 index 9b2bf8e5..00000000 --- a/debian/uncrustify-trinity/uncrustify-trinity-0.78.1/tests/expected/d/40006-Lexer.d +++ /dev/null @@ -1,2499 +0,0 @@ -/+ - Copyright (c) 1999-2006 by Digital Mars - All Rights Reserved - written by Walter Bright www.digitalmars.com - License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt. - See the included readme.txt for details. - D Language conversion by: J Duncan - +/ - -/** - d language lexer - */ - -module dparser.Lexer; - -import dparser.Root; - -import dparser.Tokens; -import dparser.Token; -import dparser.Keyword; - -import dparser.Types; - -import dparser.Module; -import dparser.Identifier; -import dparser.unialpha; - -import dparser.OutBuffer; - -//private import std.ctype; -//private import std.string; -//import dwf.core.debugapi; - -int errno = 0; - -//#if _WIN32 && __DMC__ -// from \dm\src\include\setlocal.h -//extern "C" char * __cdecl __locale_decpoint; -char *__locale_decpoint; - -//#endif -//const uint LS = 0x2028; // UTF line separator -//const uint PS = 0x2029; // UTF paragraph separator - -//extern int isUniAlpha(unsigned u); -//extern int HtmlNamedEntity(unsigned char *p, int length); - -/** - Lexer object - */ - -class Lexer -{ - static Identifier[char[]] stringtable; - static OutBuffer stringbuffer; - static Token *freelist; - - Token token; // current token - Module mod; // current module - Loc loc; // for error messages - ubyte *base; // pointer to start of buffer - ubyte *end; // past end of buffer - ubyte *p; // current character - int doDocComment; // collect doc comment information - int anyToken; // !=0 means seen at least one token - int commentToken; // !=0 means comments are TOKcomment's - - - this(Module mod, ubyte *base, uint begoffset, uint endoffset, int doDocComment, int commentToken) - { - if (stringbuffer is null) - stringbuffer = new OutBuffer; - loc = Loc(mod, 1); - - this.base = base; - this.end = base + endoffset; - this.p = base + begoffset; - this.mod = mod; - this.doDocComment = doDocComment; - this.commentToken = commentToken; - - /* - If first line starts with '#!', ignore the line - */ - - if (p[0] == '#' && p[1] == '!') - { - p += 2; - while (true) - { - ubyte c = *p; - - switch (c) - { - case '\n': - p++; - break; - - case '\r': - p++; - if (*p == '\n') - p++; - break; - - case 0: - case 0x1A: - break; - - default: - if (c & 0x80) - { - uint u = decodeUTF(); - - if (u == PS || u == LS) - break; - } - p++; - continue; - } - break; - } - - loc.linnum = 2; - } - } - - - - // generate a unique identifier for this string - static Identifier idPool(in char[] str) - { -// StringValue sv; -// uint len = s.length; -// StringValue sv = stringtable.update(s, len); -// Identifier* id = cast(Identifier*) sv.ptrvalue; -// if( id is null ) - if ((str in stringtable) == null) - { - stringtable[str] = new Identifier(str, TOK.TOKidentifier); - } - return stringtable[str]; - } - - static void initKeywords() - { - // build character map - cmtable_init(); - - // create keyword tokens & identifiers - dparser.Keyword.initKeywords(); - - // create standard lexer tokens - dparser.Token.createLexerTokens(); - } - - // Combine two document comments into one. - static char[] combineComments(char[] c1, char[] c2) - { - char[] c = c2; - - if (c1.length) - { - c = c1; - if (c2.length) - { - c = c1 ~ "\n" ~ c2; - } - } - return c; - } - - // Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence. - //! fix - uint decodeUTF() - { - ubyte *s = p; - ubyte c = *s; - - assert(c & 0x80); - if (!(c & 0x80)) - return c; - - return cast(uint)'X'; - /* - dchar u; - uint len; - - - - // Check length of remaining string up to 6 UTF-8 characters - for( len = 1; len < 6 && s[len]; len++ ) - { - - } - /+ - uint idx = 0; - char* msg = utf_decodeChar( s, len, &idx, &u ); - p += idx - 1; - if( msg ) - { - error(msg); - } - +/ - return u; - */ - } - - void error(...) - { - if ((mod !is null) && !global.gag) - { - writefln(formatLoc(loc, _arguments, _argptr)); - /* - char[] p = loc.toChars(); - if( p.length ) - writef( "%s: ", p ); - writefx( stdout, _arguments, _argptr, 1 ); - */ - if (global.errors >= global.max_errors) // moderate blizzard of cascading messages - throw new Exception("too many errors"); - } - - global.errors++; - } - - void errorLoc(Loc loc, ...) - { - if ((mod !is null) && !global.gag) - { - writefln(formatLoc(loc, _arguments, _argptr)); - /* - char[] p = loc.toChars(); - if( p.length ) - writef("%s: ", p); - writefx(stdout, _arguments, _argptr, 1); - */ - if (global.errors >= 20) // moderate blizzard of cascading messages - throw new Exception("too many errors"); - } - - global.errors++; - } - - - TOK nextToken() - { - if (token.next) - { - Token *t = token.next; - - memcpy(&token, t, Token.sizeof); -// t.next = freelist; -// freelist = t; - } - else - { - scan(&token); - } -// token.print(); - return token.value; - } - - Token *peek(inout Token ct) - { - Token *t; - - if (ct.next) - t = ct.next; - else - { - t = new Token; - scan(t); - t.next = null; - ct.next = t; - } - return t; - } - - // Turn next token in buffer into a token. - - void scan(Token *t) - { -// debug writefln("scan token"); - uint lastLine = loc.linnum; - uint linnum; - - t.blockComment = null; - t.lineComment = null; - while (true) - { - t.ptr = p; -// debug writefln( " p = %d, *p = ", cast(uint)p, cast(char)*p ); - switch (*p) - { - case 0: - case 0x1a: - t.value = TOK.TOKeof; // end of file -// debug writefln( " EOF" ); - return; - - case ' ': - case '\t': - case '\v': - case '\f': - p++; -// debug writefln( " whitespace" ); - continue; // skip white space - - case '\r': -// debug writefln( " cr" ); - p++; - if (*p != '\n') // if CR stands by itself - loc.linnum++; - continue; // skip white space - - case '\n': -// debug writefln( " nl" ); - p++; - loc.linnum++; - continue; // skip white space - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - t.value = number(t); - return; - -/* - #if CSTRINGS - case '\'': - t.value = charConstant(t, 0); - return; - - case '"': - t.value = stringConstant(t,0); - return; - - case 'l': - case 'L': - if( p[1] == '\'') - { - p++; - t.value = charConstant(t, 1); - return; - } - else if( p[1] == '"') - { - p++; - t.value = stringConstant(t, 1); - return; - } - #else - */ - case '\'': -// debug writefln( " char" ); - t.value = charConstant(t, 0); - return; - - case 'r': -// debug writefln( " wysiwyg" ); - if (p[1] != '"') - goto case_identifier; - p++; - - case '`': - t.value = wysiwygStringConstant(t, *p); - return; - - case 'x': -// debug writefln( " hex string" ); - if (p[1] != '"') - goto case_identifier; - p++; - t.value = hexStringConstant(t); - return; - - - case '"': -// debug writefln( " string" ); - t.value = escapeStringConstant(t, 0); -// debug writefln( t.ustring ); - return; - - case '\\': // escaped string literal -// debug writefln( " escaped string literal" ); - uint c; - - stringbuffer.offset = 0; - do - { - p++; - c = escapeSequence(); - stringbuffer.write(c); - } while (*p == '\\'); -// t.len = stringbuffer.offset; -// stringbuffer.write(cast(byte)0); - t.ustring = stringbuffer.toString; -// memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset ); - t.postfix = 0; - t.value = TOK.TOKstring; - return; - - case 'l': - case 'L': -// #endif - - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - case 'g': - case 'h': - case 'i': - case 'j': - case 'k': - case 'm': - case 'n': - case 'o': - case 'p': - case 'q': /*case 'r':*/ - case 's': - case 't': - case 'u': - case 'v': - case 'w': /*case 'x':*/ - case 'y': - case 'z': - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - case 'J': - case 'K': - case 'M': - case 'N': - case 'O': - case 'P': - case 'Q': - case 'R': - case 'S': - case 'T': - case 'U': - case 'V': - case 'W': - case 'X': - case 'Y': - case 'Z': - case '_': - case_identifier: - { -// debug writefln( " identifier" ); - ubyte c; - - do - { - c = *++p; - } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF()))); - -// sv = stringtable.update((char *)t.ptr, p - t.ptr); - char[] tmp; - - tmp.length = p - t.ptr; - memcpy(tmp.ptr, t.ptr, p - t.ptr); - Identifier id; - Identifier *pid = tmp in stringtable; - - if (pid) - { - id = *pid; - } - - if (id is null) - { - id = new Identifier(tmp, TOK.TOKidentifier); - stringtable[tmp] = id; - } - - t.identifier = id; - t.value = cast(TOK)id.value; - anyToken = 1; - - // if special identifier token - if (*t.ptr == '_') - { - static char date[11 + 1]; - static char time[8 + 1]; - static char timestamp[24 + 1]; - - if (!date[0]) // lazy evaluation - { - //!! - /+ - time_t t; - char *p; - .time(&t); - p = ctime(&t); - assert(p); - sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20); - sprintf(time.ptr, "%.8s", p + 11); - sprintf(timestamp.ptr, "%.24s", p); - +/ - } - - if (mod && id is Id.FILE) - { - t.value = TOK.TOKstring; - if (loc.filename.length) - t.ustring = loc.filename; - else - t.ustring = mod.identifier.toChars(); - goto Llen; - } - else if (mod && id == Id.LINE) - { - t.value = TOK.TOKint64v; - t.uns64value = loc.linnum; - } - else if (id == Id.DATE) - { - t.value = TOK.TOKstring; - //! t.ustring = date; - goto Llen; - } - else if (id == Id.TIME) - { - t.value = TOK.TOKstring; - //! t.ustring = time; - goto Llen; - } - else if (id == Id.TIMESTAMP) - { - t.value = TOK.TOKstring; - //! t.ustring = timestamp; - Llen: - t.postfix = 0; -// t.len = strlen((char *)t.ustring); - } - } - //printf("t.value = %d\n",t.value); - return; - } - - // comments - case '/': - p++; - switch (*p) - { - case '=': - p++; - t.value = TOK.TOKdivass; - return; - - case '*': // '/*' - p++; - linnum = loc.linnum; - while (true) - { - while (true) - { - ubyte c = *p; - - switch (c) - { - case '/': - break; - - case '\n': - loc.linnum++; - p++; - continue; - - case '\r': - p++; - if (*p != '\n') - loc.linnum++; - continue; - - case 0: - case 0x1A: - error("unterminated /* */ comment"); - p = end; - t.value = TOK.TOKeof; - return; - - default: - if (c & 0x80) - { - uint u = decodeUTF(); - - if (u == PS || u == LS) - loc.linnum++; - } - p++; - continue; - } - break; - } - p++; - if (p[-2] == '*' && p - 3 != t.ptr) - break; - } - - if (commentToken) - { - t.value = TOK.TOKcomment; - return; - } - // if /** but not /**/ - else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) - getDocComment(t, lastLine == linnum); //! ? - continue; - - case '/': // do // style comments - linnum = loc.linnum; - while (1) - { - ubyte c = *++p; - - switch (c) - { - case '\n': - break; - - case '\r': - if (p[1] == '\n') - p++; - break; - - case 0: - case 0x1a: - if (commentToken) - { - p = end; - t.value = TOK.TOKcomment; - return; - } - if (doDocComment && t.ptr[2] == '/') - getDocComment(t, lastLine == linnum); - p = end; - t.value = TOK.TOKeof; - return; - - default: - if (c & 0x80) - { - uint u = decodeUTF(); - - if (u == PS || u == LS) - break; - } - continue; - } - break; - } - - if (commentToken) - { - p++; - loc.linnum++; - t.value = TOK.TOKcomment; - return; - } - if (doDocComment && t.ptr[2] == '/') - getDocComment(t, lastLine == linnum); - - p++; - loc.linnum++; - continue; - - case '+': - { - int nest; - - linnum = loc.linnum; - p++; - nest = 1; - while (1) - { - ubyte c = *p; - - switch (c) - { - case '/': - p++; - if (*p == '+') - { - p++; - nest++; - } - continue; - - case '+': - p++; - if (*p == '/') - { - p++; - if (--nest == 0) - break; - } - continue; - - case '\r': - p++; - if (*p != '\n') - loc.linnum++; - continue; - - case '\n': - loc.linnum++; - p++; - continue; - - case 0: - case 0x1A: - error("unterminated /+ +/ comment"); - p = end; - t.value = TOK.TOKeof; - return; - - default: - if (c & 0x80) - { - uint u = decodeUTF(); - - if (u == PS || u == LS) - loc.linnum++; - } - p++; - continue; - } - break; - } - if (commentToken) - { - t.value = TOK.TOKcomment; - return; - } - if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) - { - // if /++ but not /++/ - getDocComment(t, lastLine == linnum); - } - continue; - } - - default: - break; - } - t.value = TOK.TOKdiv; - return; - - case '.': - p++; - if (isdigit(*p)) - { - p--; - t.value = inreal(t); - } - else if (p[0] == '.') - { - if (p[1] == '.') - { - p += 2; - t.value = TOK.TOKdotdotdot; - } - else - { - p++; - t.value = TOK.TOKslice; - } - } - else - t.value = TOK.TOKdot; - return; - - case '&': - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKandass; - } - else if (*p == '&') - { - p++; - t.value = TOK.TOKandand; - } - else - t.value = TOK.TOKand; - return; - - // |, ||, |= - case '|': - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKorass; - } - else if (*p == '|') - { - p++; - t.value = TOK.TOKoror; - } - else - t.value = TOK.TOKor; - return; - - case '-': - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKminass; - } - else if (*p == '-') - { - p++; - t.value = TOK.TOKminusminus; - } - else - t.value = TOK.TOKmin; - return; - - // +, +=, ++ - case '+': - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKaddass; // += - } - else if (*p == '+') - { - p++; - t.value = TOK.TOKplusplus; // ++ - } - else - t.value = TOK.TOKadd; // + - return; - - // <, <=, <<=, <<, <>=, <> - case '<': - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKle; // <= - } - else if (*p == '<') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKshlass; // <<= - } - else - t.value = TOK.TOKshl; // << - } - else if (*p == '>') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKleg; // <>= - } - else - t.value = TOK.TOKlg; // <> - } - else - t.value = TOK.TOKlt; // < - return; - - // >, >>, >>>, >=, >>=, >>>= - case '>': - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKge; // >= - } - else if (*p == '>') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKshrass; // >>= - } - else if (*p == '>') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKushrass; // >>>= - } - else - t.value = TOK.TOKushr; // >>> - } - else - t.value = TOK.TOKshr; // >> - } - else - t.value = TOK.TOKgt; // > - return; - - case '!': - p++; - if (*p == '=') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKnotidentity; // !== - } - else - t.value = TOK.TOKnotequal; // != - } - else if (*p == '<') - { - p++; - if (*p == '>') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKunord; // !<>= - } - else - t.value = TOK.TOKue; // !<> - } - else if (*p == '=') - { - p++; - t.value = TOK.TOKug; // !<= - } - else - t.value = TOK.TOKuge; // !< - } - else if (*p == '>') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKul; // !>= - } - else - t.value = TOK.TOKule; // !> - } - else - t.value = TOK.TOKnot; // ! - return; - - case '=': - p++; - if (*p == '=') - { - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKidentity; // === - } - else - t.value = TOK.TOKequal; // == - } - else - t.value = TOK.TOKassign; // = - return; - - case '~': - p++; - if (*p == '=') - { - p++; - t.value = TOK.TOKcatass; // ~= - } - else - t.value = TOK.TOKtilde; // ~ - return; - - // SINGLE - case '(': p++; t.value = TOK.TOKlparen; return; - - case ')': p++; t.value = TOK.TOKrparen; return; - - case '[': p++; t.value = TOK.TOKlbracket; return; - - case ']': p++; t.value = TOK.TOKrbracket; return; - - case '{': p++; t.value = TOK.TOKlcurly; return; - - case '}': p++; t.value = TOK.TOKrcurly; return; - - case '?': p++; t.value = TOK.TOKquestion; return; - - case ',': p++; t.value = TOK.TOKcomma; return; - - case ';': p++; t.value = TOK.TOKsemicolon; return; - - case ':': p++; t.value = TOK.TOKcolon; return; - - case '$': p++; t.value = TOK.TOKdollar; return; - - // DOUBLE - case '*': p++; if (*p == '=') - { - p++; t.value = TOK.TOKmulass; - } - else - t.value = TOK.TOKmul; - return; - - case '%': p++; if (*p == '=') - { - p++; t.value = TOK.TOKmodass; - } - else - t.value = TOK.TOKmod; - return; - - case '^': p++; if (*p == '=') - { - p++; t.value = TOK.TOKxorass; - } - else - t.value = TOK.TOKxor; - return; - -// removed 148 case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return; - - - case '#': - p++; - Pragma(); - continue; - - default: - { - debug writefln(" default char"); - ubyte c = *p; - - if (c & 0x80) - { - uint u = decodeUTF(); - - // Check for start of unicode identifier - if (isUniAlpha(u)) - goto case_identifier; - - if (u == PS || u == LS) - { - loc.linnum++; - p++; - continue; - } - } - if (isprint(c)) - error("unsupported char '%s'", cast(char)c); - else - error("unsupported char 0x%02x", cast(ubyte)c); - p++; - continue; - } - } - } - } - - - - // Parse escape sequence. - uint escapeSequence() - { - uint c; - int n; - int ndigits; - - c = *p; - switch (c) - { - case '\'': - case '"': - case '?': - case '\\': - Lconsume: - p++; - break; - - case 'a': c = 7; goto Lconsume; - - case 'b': c = 8; goto Lconsume; - - case 'f': c = 12; goto Lconsume; - - case 'n': c = 10; goto Lconsume; - - case 'r': c = 13; goto Lconsume; - - case 't': c = 9; goto Lconsume; - - case 'v': c = 11; goto Lconsume; - - case 'u': - ndigits = 4; - goto Lhex; - - case 'U': - ndigits = 8; - goto Lhex; - - case 'x': - ndigits = 2; - Lhex: - p++; - c = *p; - if (ishex(c)) - { - uint v; - - n = 0; - v = 0; - while (1) - { - if (isdigit(c)) - c -= '0'; - else if (islower(c)) - c -= 'a' - 10; - else - c -= 'A' - 10; - v = v * 16 + c; - c = *++p; - if (++n == ndigits) - break; - if (!ishex(c)) - { - error("escape hex sequence has %d hex digits instead of %d", n, ndigits); - break; - } - } -//! if( ndigits != 2 && !utf_isValidDchar(v)) -//! error("invalid UTF character \\U%08x", v); - c = v; - } - else - error("undefined escape hex sequence \\%s\n", c); - break; - - case '&': // named character entity - for (ubyte *idstart = ++p; 1; p++) - { - switch (*p) - { - case ';': - //!!! - /+ - c = HtmlNamedEntity(idstart, p - idstart); - if( c == ~0 ) - { - error("unnamed character entity &%.*s;", p - idstart, idstart); - c = ' '; - } - - p++; - +/ - break; - - default: - if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) - continue; - error("unterminated named entity"); - break; - } - break; - } - break; - - case 0: - case 0x1a: // end of file - c = '\\'; - break; - - default: - if (isoctal(c)) - { - ubyte v; - - n = 0; - do - { - v = v * 8 + (c - '0'); - c = *++p; - } while (++n < 3 && isoctal(c)); - c = v; - } - else - error("undefined escape sequence \\%s\n", c); - break; - } - return c; - } - - /************************************** - */ - - TOK wysiwygStringConstant(Token *t, int tc) - { - uint c; - Loc start = loc; - - p++; - stringbuffer.offset = 0; - while (1) - { - c = *p++; - switch (c) - { - case '\n': - loc.linnum++; - break; - - case '\r': - if (*p == '\n') - continue; // ignore - c = '\n'; // treat EndOfLine as \n character - loc.linnum++; - break; - - case 0: - case 0x1a: - error("unterminated string constant starting at %s", start.toChars()); - t.ustring = ""; - t.postfix = 0; - return TOK.TOKstring; - - case '"': - case '`': - if (c == tc) - { -// t.len = stringbuffer.offset; - stringbuffer.write(cast(byte)0); - t.ustring = stringbuffer.toString; -// t.ustring = (ubyte *)mem.malloc(stringbuffer.offset); -// memcpy(t.ustring, stringbuffer.data, stringbuffer.offset); - stringPostfix(t); - return TOK.TOKstring; - } - break; - - default: - if (c & 0x80) - { - p--; - uint u = decodeUTF(); - - p++; - if (u == PS || u == LS) - loc.linnum++; - stringbuffer.write(u); - continue; - } - break; - } - stringbuffer.write(c); - } - } - - /************************************** - * Lex hex strings: - * x"0A ae 34FE BD" - */ - - TOK hexStringConstant(Token *t) - { - uint c; - Loc start = loc; - uint n = 0; - uint v; - - p++; - stringbuffer.offset = 0; - while (1) - { - c = *p++; - switch (c) - { - case ' ': - case '\t': - case '\v': - case '\f': - continue; // skip white space - - case '\r': - if (*p == '\n') - continue; // ignore - - // Treat isolated '\r' as if it were a '\n' - case '\n': - loc.linnum++; - continue; - - case 0: - case 0x1a: - error("unterminated string constant starting at %s", start.toChars()); - t.ustring = ""; - t.postfix = 0; - return TOK.TOKstring; - - case '"': - if (n & 1) - { - error("odd number (%d) of hex characters in hex string", n); - stringbuffer.write(v); - } -// t.len = stringbuffer.offset; -// stringbuffer.write(cast(byte)0); - t.ustring = stringbuffer.toString; -// t.ustring = (ubyte *)mem.malloc(stringbuffer.offset); -// memcpy(t.ustring, stringbuffer.data, stringbuffer.offset); - stringPostfix(t); - return TOK.TOKstring; - - default: - if (c >= '0' && c <= '9') - c -= '0'; - else if (c >= 'a' && c <= 'f') - c -= 'a' - 10; - else if (c >= 'A' && c <= 'F') - c -= 'A' - 10; - else if (c & 0x80) - { - p--; - uint u = decodeUTF(); - - p++; - if (u == PS || u == LS) - loc.linnum++; - else - error("non-hex character \\u%x", u); - } - else - error("non-hex character '%s'", c); - if (n & 1) - { - v = (v << 4) | c; - stringbuffer.write(v); - } - else - v = c; - n++; - break; - } - } - } - - /************************************** - */ - - TOK escapeStringConstant(Token *t, int wide) - { - uint c; - Loc start = loc; - - p++; - stringbuffer.offset = 0; - // debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) ); - while (1) - { - c = *p++; - switch (c) - { - case '\\': - switch (*p) - { - case 'u': - case 'U': - case '&': - c = escapeSequence(); - stringbuffer.write(c); - continue; - - default: - c = escapeSequence(); - break; - } - break; - - case '\n': - loc.linnum++; - break; - - case '\r': - if (*p == '\n') - continue; // ignore - c = '\n'; // treat EndOfLine as \n character - loc.linnum++; - break; - - case '"': -// writefln( "end of string: ", stringbuffer.toString ); - t.ustring = stringbuffer.toString().dup; - // t.len = stringbuffer.offset; - // stringbuffer.write(cast(byte)0); - // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset); - // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset); - stringPostfix(t); - - return TOK.TOKstring; - - case 0: - case 0x1a: - p--; - error("unterminated string constant starting at %s", start.toChars()); - t.ustring = ""; -// t.len = 0; - t.postfix = 0; - return TOK.TOKstring; - - default: - if (c & 0x80) - { - p--; - c = decodeUTF(); - if (c == LS || c == PS) - { - c = '\n'; - loc.linnum++; - } - p++; - stringbuffer.write(cast(char)c); - continue; - } - break; - } - stringbuffer.write(cast(char)c); -// writefln( stringbuffer.toString ); - } - } - - //************************************** - TOK charConstant(Token *t, int wide) - { - uint c; - TOK tk = TOK.TOKcharv; - - //printf("Lexer.charConstant\n"); - p++; - c = *p++; - switch (c) - { - case '\\': - switch (*p) - { - case 'u': - t.uns64value = escapeSequence(); - tk = TOK.TOKwcharv; - break; - - case 'U': - case '&': - t.uns64value = escapeSequence(); - tk = TOK.TOKdcharv; - break; - - default: - t.uns64value = escapeSequence(); - break; - } - break; - - case '\n': - L1: - loc.linnum++; - - case '\r': - case 0: - case 0x1a: - case '\'': - error("unterminated character constant"); - return tk; - - default: - if (c & 0x80) - { - p--; - c = decodeUTF(); - p++; - if (c == LS || c == PS) - goto L1; - if (c < 0xd800 || (c >= 0xe000 && c < 0xfffe)) - tk = TOK.TOKwcharv; - else - tk = TOK.TOKdcharv; - } - t.uns64value = c; - break; - } - - if (*p != '\'') - { - error("unterminated character constant"); - return tk; - } - p++; - return tk; - } - - // Get postfix of string literal. - void stringPostfix(Token *t) - { - switch (*p) - { - case 'c': - case 'w': - case 'd': - t.postfix = *p; - p++; - break; - - default: - t.postfix = 0; - break; - } - } - - /*************************************** - * Read \u or \U unicode sequence - * Input: - * u 'u' or 'U' - */ - /* - uint Wchar(uint u) - { - uint value; - uint n; - ubyte c; - uint nchars; - - nchars = (u == 'U') ? 8 : 4; - value = 0; - for (n = 0; 1; n++) - { - ++p; - if( n == nchars) - break; - c = *p; - if( !ishex(c)) - { - error("\\%s sequence must be followed by %d hex characters", u, nchars); - break; - } - if( isdigit(c)) - c -= '0'; - else if( islower(c)) - c -= 'a' - 10; - else - c -= 'A' - 10; - value <<= 4; - value |= c; - } - return value; - } - */ - - /************************************** - * Read in a number. - * If it's an integer, store it in tok.TKutok.Vlong. - * integers can be decimal, octal or hex - * Handle the suffixes U, UL, LU, L, etc. - * If it's double, store it in tok.TKutok.Vdouble. - * Returns: - * TKnum - * TKdouble,... - */ - - TOK number(Token *t) - { - //debug writefln("Lexer.number()"); - // We use a state machine to collect numbers - enum STATE - { - STATE_initial, - STATE_0, - STATE_decimal, - STATE_octal, - STATE_octale, - STATE_hex, - STATE_binary, - STATE_hex0, - STATE_binary0, - STATE_hexh, - STATE_error - } - - enum FLAGS - { - FLAGS_decimal = 1, // decimal - FLAGS_unsigned = 2, // u or U suffix - FLAGS_long = 4, // l or L suffix - } - FLAGS flags = FLAGS.FLAGS_decimal; - - int i; - TOK result; - int base; - - stringbuffer.offset = 0; -// stringbuffer.data = null; - STATE state = STATE.STATE_initial; - ubyte *start = p; - - TOK _isreal() - { - p = start; - return inreal(t); - } - - while (true) - { - char c = cast(char)*p; - - switch (state) - { - case STATE.STATE_initial: // opening state - if (c == '0') - state = STATE.STATE_0; - else - state = STATE.STATE_decimal; - break; - - case STATE.STATE_0: - flags = cast(FLAGS)(flags & ~FLAGS.FLAGS_decimal); - switch (c) - { - // #if ZEROH -// case 'H': // 0h -// case 'h': -// goto hexh; - // #endif - case 'X': - case 'x': - state = STATE.STATE_hex0; - break; - - case '.': - if (p[1] == '.') // .. is a separate token - goto done; - - case 'i': - case 'f': - case 'F': - goto _Real; - - // #if ZEROH -// case 'E': -// case 'e': -// goto case_hex; - // #endif - case 'B': - case 'b': - state = STATE.STATE_binary0; - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - state = STATE.STATE_octal; - break; - - // #if ZEROH -// case '8': case '9': case 'A': -// case 'C': case 'D': case 'F': -// case 'a': case 'c': case 'd': case 'f': -// case_hex: -// state = STATE.STATE_hexh; -// break; - // #endif - case '_': - state = STATE.STATE_octal; - p++; - continue; - - default: - goto done; - } - break; - - case STATE.STATE_decimal: // reading decimal number - - // if its not a digit - decimal complete or not a decimal - if (!isdigit(c)) - { -// debug writefln( "\tnon-digit( %s )", c ); - // #if ZEROH -// if( ishex(c) || c == 'H' || c == 'h' ) -// goto hexh; - // #endif - //! wtf ? - // ignore embedded _ - if (c == '_') - { - p++; - continue; - } - - // check decimal point - make real - if (c == '.' && p[1] != '.') - goto _Real; - - // check for mantra - make real - if (c == 'i' || c == 'f' || c == 'F' || c == 'e' || c == 'E') - { - _Real: // It's a real number. Back up and rescan as a real - p = start; - return inreal(t); - } - - goto done; - } - break; - - case STATE.STATE_hex0: // reading hex number - case STATE.STATE_hex: - if (!ishex(c)) - { - if (c == '_') // ignore embedded _ - { - p++; - continue; - } - if (c == '.' && p[1] != '.') - goto _Real; - if (c == 'P' || c == 'p' || c == 'i') - goto _Real; - if (state == STATE.STATE_hex0) - error("Hex digit expected, not '%s'", c); - goto done; - } - state = STATE.STATE_hex; - break; - - // #if ZEROH -// hexh: -// state = STATE.STATE_hexh; -// -// case STATE.STATE_hexh: // parse numbers like 0FFh -// if( !ishex(c)) -// { -// if( c == 'H' || c == 'h') -// { -// p++; -// base = 16; -// goto done; -// } -// else -// { -// // Check for something like 1E3 or 0E24 -// if( memchr(stringbuffer.data.ptr, 'E', stringbuffer.offset) || memchr( stringbuffer.data.ptr, 'e', stringbuffer.offset)) -// goto _Real; -// error("Hex digit expected, not '%s'", c); -// goto done; -// } -// } -// break; - // #endif - - case STATE.STATE_octal: // reading octal number - case STATE.STATE_octale: // reading octal number with non-octal digits - if (!isoctal(c)) - { -// #if ZEROH -// if( ishex(c) || c == 'H' || c == 'h' ) -// goto hexh; -// #endif - if (c == '_') // ignore embedded _ - { - p++; - continue; - } - if (c == '.' && p[1] != '.') - goto _Real; - if (c == 'i') - goto _Real; - if (isdigit(c)) - state = STATE.STATE_octale; - else - goto done; - } - break; - - case STATE.STATE_binary0: // starting binary number - case STATE.STATE_binary: // reading binary number - if (c != '0' && c != '1') - { - // #if ZEROH -// if( ishex(c) || c == 'H' || c == 'h' ) -// goto hexh; - // #endif - if (c == '_') // ignore embedded _ - { - p++; - continue; - } - if (state == STATE.STATE_binary0) - { - error("binary digit expected"); - state = STATE.STATE_error; - break; - } - else - goto done; - } - state = STATE.STATE_binary; - break; - - case STATE.STATE_error: // for error recovery - if (!isdigit(c)) // scan until non-digit - goto done; - break; - - default: - assert(0); - } - stringbuffer.write(cast(char)c); - p++; - } - done: - stringbuffer.write(cast(char)0); // terminate string - -// debug writefln( "\tdigit complete( %s )", stringbuffer.toString ); - - if (state == STATE.STATE_octale) - error("Octal digit expected"); - - uinteger_t n; // unsigned >=64 bit integer type - - if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) - n = stringbuffer.data[0] - '0'; - else - { - // Convert string to integer - char *p = cast(char *)stringbuffer.data.ptr; - int r = 10; - int d; - - if (*p == '0') - { - if (p[1] == 'x' || p[1] == 'X') - { - // "0x#" - p += 2; - r = 16; - } - else if (p[1] == 'b' || p[1] == 'B') - { - // "0b#" - binary - p += 2; - r = 2; - } - else if (isdigit(p[1])) - { - p += 1; - r = 8; - } - } - - n = 0; - - while (true) - { - if (*p >= '0' && *p <= '9') - d = *p - '0'; - else if (*p >= 'a' && *p <= 'z') - d = *p - 'a' + 10; - else if (*p >= 'A' && *p <= 'Z') - d = *p - 'A' + 10; - else - break; - - if (d >= r) - break; - - if (n * r + d < n) - { - error("integer overflow"); - break; - } - - n = n * r + d; - p++; - } - - // if n needs more than 64 bits - if (n.sizeof > 8 && n > 0xffffffffffffffffL) - error("integer overflow"); - } - - // Parse trailing 'u', 'U', 'l' or 'L' in any combination - while (true) - { - ubyte f; - - switch (*p) - { - case 'U': - case 'u': - f = FLAGS.FLAGS_unsigned; - goto L1; - - case 'L': - case 'l': - f = FLAGS.FLAGS_long; - L1: - p++; - if (flags & f) - error("unrecognized token"); - flags = cast(FLAGS)(flags | f); - continue; - - default: - break; - } - break; - } - - switch (flags) - { - case 0: - /* Octal or Hexadecimal constant. - * First that fits: int, uint, long, ulong - */ - if (n & 0x8000000000000000L) - result = TOK.TOKuns64v; - else if (n & 0xffffffff00000000L) - result = TOK.TOKint64v; - else if (n & 0x80000000) - result = TOK.TOKuns32v; - else - result = TOK.TOKint32v; - break; - - case FLAGS.FLAGS_decimal: - /* First that fits: int, long, long long - */ - if (n & 0x8000000000000000L) - { - error("signed integer overflow"); - result = TOK.TOKuns64v; - } - else if (n & 0xffffffff80000000L) - result = TOK.TOKint64v; - else - result = TOK.TOKint32v; - break; - - case FLAGS.FLAGS_unsigned: - case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned: - /* First that fits: uint, ulong - */ - if (n & 0xffffffff00000000L) - result = TOK.TOKuns64v; - else - result = TOK.TOKuns32v; - break; - - case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long: - if (n & 0x8000000000000000L) - { - error("signed integer overflow"); - result = TOK.TOKuns64v; - } - else - result = TOK.TOKint64v; - break; - - case FLAGS.FLAGS_long: - if (n & 0x8000000000000000L) - result = TOK.TOKuns64v; - else - result = TOK.TOKint64v; - break; - - case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: - case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long: - result = TOK.TOKuns64v; - break; - - default: - debug writefln("%x", flags); - assert(0); - } - t.uns64value = n; - return result; - } - - /************************************** - * Read in characters, converting them to real. - * Bugs: - * Exponent overflow not detected. - * Too much requested precision is not detected. - */ - - TOK inreal(Token *t) - { - int dblstate; - uint c; - char hex; // is this a hexadecimal-floating-constant? - TOK result; - - //printf("Lexer.inreal()\n"); - stringbuffer.offset = 0; - dblstate = 0; - hex = 0; - Lnext: - while (1) - { - // Get next char from input - c = *p++; - //printf("dblstate = %d, c = '%s'\n", dblstate, c); - while (1) - { - switch (dblstate) - { - case 0: // opening state - if (c == '0') - dblstate = 9; - else if (c == '.') - dblstate = 3; - else - dblstate = 1; - break; - - case 9: - dblstate = 1; - if (c == 'X' || c == 'x') - { - hex++; - break; - } - - case 1: // digits to left of . - case 3: // digits to right of . - case 7: // continuing exponent digits - if (!isdigit(c) && !(hex && isxdigit(c))) - { - if (c == '_') - goto Lnext; // ignore embedded '_' - dblstate++; - continue; - } - break; - - case 2: // no more digits to left of . - if (c == '.') - { - dblstate++; - break; - } - - case 4: // no more digits to right of . - if ((c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p')) - { - dblstate = 5; - hex = 0; // exponent is always decimal - break; - } - if (hex) - error("binary-exponent-part required"); - goto done; - - case 5: // looking immediately to right of E - dblstate++; - if (c == '-' || c == '+') - break; - - case 6: // 1st exponent digit expected - if (!isdigit(c)) - error("exponent expected"); - dblstate++; - break; - - case 8: // past end of exponent digits - goto done; - } - break; - } - stringbuffer.write(c); - } - done: - p--; - - stringbuffer.write(cast(byte)0); - -// #if _WIN32 && __DMC__ - char *save = __locale_decpoint; - - __locale_decpoint = "."; -// #endif - t.float80value = strtold(cast(char *)stringbuffer.data.ptr, null); - errno = 0; - switch (*p) - { - case 'F': - case 'f': - strtof(cast(char *)stringbuffer.data.ptr, null); - result = TOK.TOKfloat32v; - p++; - break; - - default: - strtod(cast(char *)stringbuffer.data.ptr, null); - result = TOK.TOKfloat64v; - break; - - case 'L': - case 'l': - result = TOK.TOKfloat80v; - p++; - break; - } - if (*p == 'i' || *p == 'I') - { - p++; - switch (result) - { - case TOK.TOKfloat32v: - result = TOK.TOKimaginary32v; - break; - - case TOK.TOKfloat64v: - result = TOK.TOKimaginary64v; - break; - - case TOK.TOKfloat80v: - result = TOK.TOKimaginary80v; - break; - } - } -// #if _WIN32 && __DMC__ - __locale_decpoint = save; -// #endif - if (errno == ERANGE) - error("number is not representable"); - return result; - } - - - - - /********************************************* - * Do pragma. - * Currently, the only pragma supported is: - * #line linnum [filespec] - */ - - void Pragma() - { - Token tok; - int linnum; - char[] filespec; - Loc loc = this.loc; - - scan(&tok); - - if (tok.value != TOK.TOKidentifier || tok.identifier != Id.line) - goto Lerr; - - scan(&tok); - if (tok.value == TOK.TOKint32v || tok.value == TOK.TOKint64v) - linnum = tok.uns64value - 1; - else - goto Lerr; - - while (1) - { - switch (*p) - { - case 0: - case 0x1a: - case '\n': - Lnewline: - this.loc.linnum = linnum; - if (filespec.length) - this.loc.filename = filespec; - return; - - case '\r': - p++; - if (*p != '\n') - { - p--; - goto Lnewline; - } - continue; - - case ' ': - case '\t': - case '\v': - case '\f': - p++; - continue; // skip white space - - case '_': - if (mod && memcmp(p, cast(char *)"__FILE__", 8) == 0) - { - p += 8; -//! filespec = mem.strdup(loc.filename ? loc.filename : mod.identifier.toChars()); - } - continue; - - case '"': - if (filespec) - goto Lerr; - stringbuffer.offset = 0; - p++; - while (1) - { - uint c; - - c = *p; - switch (c) - { - case '\n': - case '\r': - case 0: - case 0x1a: - goto Lerr; - - case '"': - stringbuffer.write(cast(byte)0); - // filespec = mem.strdup((char *)stringbuffer.data); - filespec = stringbuffer.toString.dup; - p++; - break; - - default: - if (c & 0x80) - { - uint u = decodeUTF(); - - if (u == PS || u == LS) - goto Lerr; - } - stringbuffer.write(c); - p++; - continue; - } - break; - } - continue; - - default: - if (*p & 0x80) - { - uint u = decodeUTF(); - - if (u == PS || u == LS) - goto Lnewline; - } - goto Lerr; - } - } - - Lerr: - errorLoc(loc, "#line integer [\"filespec\"]\\n expected"); - } - - - - /*************************************************** - * Parse doc comment embedded between t.ptr and p. - * Remove trailing blanks and tabs from lines. - * Replace all newlines with \n. - * Remove leading comment character from each line. - * Decide if it's a lineComment or a blockComment. - * Append to previous one for this token. - */ - - void getDocComment(Token *t, uint lineComment) - { - auto OutBuffer buf = new OutBuffer; - ubyte ct = t.ptr[2]; - ubyte *q = t.ptr + 3; // start of comment text - int linestart = 0; - - ubyte *qend = p; - - if (ct == '*' || ct == '+') - qend -= 2; - - // Scan over initial row of ****'s or ++++'s or ////'s - for (; q < qend; q++) - { - if (*q != ct) - break; - } - - // Remove trailing row of ****'s or ++++'s - if (ct != '/') - { - for (; q < qend; qend--) - { - if (qend[-1] != ct) - break; - } - } - - for (; q < qend; q++) - { - ubyte c = *q; - - switch (c) - { - case '*': - case '+': - if (linestart && c == ct) - { - linestart = 0; - // Trim preceding whitespace up to preceding \n - while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) - buf.offset--; - continue; - } - break; - - case ' ': - case '\t': - break; - - case '\r': - if (q[1] == '\n') - continue; // skip the \r - goto Lnewline; - - default: - if (c == 226) - { - // If LS or PS - if (q[1] == 128 && - (q[2] == 168 || q[2] == 169)) - { - q += 2; - goto Lnewline; - } - } - linestart = 0; - break; - - Lnewline: - c = '\n'; // replace all newlines with \n - - case '\n': - linestart = 1; - - // Trim trailing whitespace - while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) - buf.offset--; - - break; - } - buf.write(c); - } - - // Always end with a newline - if (!buf.offset || buf.data[buf.offset - 1] != '\n') - buf.writenl(); - - //buf.write(cast(char)0); - - // It's a line comment if the start of the doc comment comes - // after other non-whitespace on the same line. -// ubyte** dc = (lineComment && anyToken) -// ? &t.lineComment -// : &t.blockComment; - - char[] dc = (lineComment && anyToken) ? t.lineComment : t.blockComment; - - // Combine with previous doc comment, if any - if (dc.length) - dc = combineComments(dc, buf.toString().dup); - else - dc = buf.toString().dup; - -// writefln( dc ); - - if (lineComment && anyToken) - t.lineComment = dc; - else - t.blockComment = dc; - } -} - -// character maps -static ubyte[256] cmtable; - -const int CMoctal = 0x1; -const int CMhex = 0x2; -const int CMidchar = 0x4; - -ubyte isoctal(ubyte c) -{ - return cmtable[c] & CMoctal; -} -ubyte ishex(ubyte c) -{ - return cmtable[c] & CMhex; -} -ubyte isidchar(ubyte c) -{ - return cmtable[c] & CMidchar; -} - -static void cmtable_init() -{ - for (uint c = 0; c < cmtable.length; c++) - { - if ('0' <= c && c <= '7') - cmtable[c] |= CMoctal; - if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) - cmtable[c] |= CMhex; - if (isalnum(c) || c == '_') - cmtable[c] |= CMidchar; - } -} - - -/+ - struct StringValue - { - union - { - int intvalue; - void *ptrvalue; - dchar *string; - } - - char[] lstring; - } - #define CASE_BASIC_TYPES - case TOKwchar: case TOKdchar: - case TOKbit: case TOKbool: case TOKchar: - case TOKint8: case TOKuns8: - case TOKint16: case TOKuns16: - case TOKint32: case TOKuns32: - case TOKint64: case TOKuns64: - case TOKfloat32: case TOKfloat64: case TOKfloat80: - case TOKimaginary32: case TOKimaginary64: case TOKimaginary80: - case TOKcomplex32: case TOKcomplex64: case TOKcomplex80: - case TOKvoid: - - #define CASE_BASIC_TYPES_X(t) \ - case TOKvoid: t = Type::tvoid; goto LabelX; \ - case TOKint8: t = Type::tint8; goto LabelX; \ - case TOKuns8: t = Type::tuns8; goto LabelX; \ - case TOKint16: t = Type::tint16; goto LabelX; \ - case TOKuns16: t = Type::tuns16; goto LabelX; \ - case TOKint32: t = Type::tint32; goto LabelX; \ - case TOKuns32: t = Type::tuns32; goto LabelX; \ - case TOKint64: t = Type::tint64; goto LabelX; \ - case TOKuns64: t = Type::tuns64; goto LabelX; \ - case TOKfloat32: t = Type::tfloat32; goto LabelX; \ - case TOKfloat64: t = Type::tfloat64; goto LabelX; \ - case TOKfloat80: t = Type::tfloat80; goto LabelX; \ - case TOKimaginary32: t = Type::timaginary32; goto LabelX; \ - case TOKimaginary64: t = Type::timaginary64; goto LabelX; \ - case TOKimaginary80: t = Type::timaginary80; goto LabelX; \ - case TOKcomplex32: t = Type::tcomplex32; goto LabelX; \ - case TOKcomplex64: t = Type::tcomplex64; goto LabelX; \ - case TOKcomplex80: t = Type::tcomplex80; goto LabelX; \ - case TOKbit: t = Type::tbit; goto LabelX; \ - case TOKchar: t = Type::tchar; goto LabelX; \ - case TOKwchar: t = Type::twchar; goto LabelX; \ - case TOKdchar: t = Type::tdchar; goto LabelX; \ - LabelX - +/ -- cgit v1.2.1