/* * Copyright (c) 2002-2003 Jesper K. Pedersen <blackie@kde.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License version 2 as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. **/ %option noyywrap %{ #include <qstring.h> #include "textrangeregexp.h" #include "gen_qregexpparser.h" #ifdef QT_ONLY #include "compat.h" #endif void parseRange( char* txt, int* min, int* max ); RegExp* parseCharClass( char* match ); %} Escape \\. BackRef \\[1-9][0-9]* CharClass \[^?\]?[^]]*\] Range \{[0-9]*(,[0-9]*)?\} HexChar \\x[0-9a-fA-F]{1,4} OctChar \\0[0-7]{1,4} SpecialEsc \\[afnrtv] %% "\\b" return TOK_PosWordChar; "\\B" return TOK_PosNonWordChar; "\\d" { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->setDigit( true ); qregexplval.regexp = regexp; return TOK_CharClass; } "\\D" { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->setNonDigit( true ); qregexplval.regexp = regexp; return TOK_CharClass; } "\\s" { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->setSpace( true ); qregexplval.regexp = regexp; return TOK_CharClass; } "\\S" { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->setNonSpace( true ); qregexplval.regexp = regexp; return TOK_CharClass; } "\\w" { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->setWordChar( true ); qregexplval.regexp = regexp; return TOK_CharClass; } "\\W" { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->setNonWordChar( true ); qregexplval.regexp = regexp; return TOK_CharClass; } {SpecialEsc} { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->addCharacter( QString::fromLocal8Bit( yytext ) ); qregexplval.regexp = regexp; return TOK_CharClass; } {HexChar} { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->addCharacter( QString::fromLocal8Bit(yytext) ); qregexplval.regexp = regexp; return TOK_CharClass; } {OctChar} { TextRangeRegExp* regexp = new TextRangeRegExp( false ); regexp->addCharacter( QString::fromLocal8Bit(yytext) ); qregexplval.regexp = regexp; return TOK_CharClass; } "." return TOK_Dot; "$" return TOK_Dollar; "^" return TOK_Carat; "(?:" return TOK_MagicLeftParent; "(?=" return TOK_PosLookAhead; "(?!" return TOK_NegLookAhead; "(" return TOK_LeftParen; ")" return TOK_RightParent; "|" return TOK_Bar; "*" { qregexplval.range.min = 0; qregexplval.range.max=-1; return TOK_Quantifier; } "?" { qregexplval.range.min = 0; qregexplval.range.max=1; return TOK_Quantifier; } "+" { qregexplval.range.min = 1; qregexplval.range.max=-1; return TOK_Quantifier; } {Range} { parseRange( yytext, &qregexplval.range.min, &qregexplval.range.max ); return TOK_Quantifier; } {CharClass} { qregexplval.regexp = parseCharClass(yytext); return TOK_CharClass; } {BackRef} { qregexplval.backRef = atoi( yytext+1 ); return TOK_BackRef; } {Escape} { qregexplval.ch = yytext[1]; return TOK_EscapeChar; } . { qregexplval.ch = yytext[0]; return TOK_Char; } %% void setParseData( QString qstr ) { const char* cstr; if ( qstr.isNull() ) cstr = ""; else cstr = qstr.latin1(); yy_switch_to_buffer( yy_scan_string( cstr ) ); } /** This function parses a range in a form similar to "{3,4}", "{,7}" etc. and returns the value in the integers pointed to by min and max. */ void parseRange( char* txt, int* min, int* max ) { /* case txt min max 1 {} 0 -1 2 {,} 0 -1 3 {5} 5 5 4 {5,} 5 -1 5 {,7} 0 7 6 {5,7} 5 7 */ char c; int i = 1; int minimum=0, maximum=0; int minFound=0, maxFound=0, commaFound = 0; while ( (c = txt[i++]) != ',' && c != '}') { minimum = minimum*10+ c-'0'; minFound=1; } if ( c == ',' ) commaFound = 1; if ( c != '}' ) { while ( (c = txt[i++]) != '}') { maximum = maximum*10+ c-'0'; maxFound = 1; } } *min = minimum; if ( maxFound ) *max = maximum; /* case 5,6 */ else if ( !minFound ) *max = -1; /* case 1,2 */ else if ( commaFound ) *max = -1; /* case 4 */ else *max = minimum; /* case 3 */ } /** This function parses a character range like "[^ab1-4]". */ RegExp* parseCharClass( char* match ) { TextRangeRegExp* res = new TextRangeRegExp( false ); QString txt = QString::fromLocal8Bit( match ); txt = txt.mid(1,txt.length()-2); unsigned int i = 0; QChar ch = txt.at(i++); QString pendingChar; QString thisChar; bool charPending = false; bool rangePending = false; bool flushPending = false; if ( ch == QChar('^') ) { res->setNegate( true ); ch = txt.at(i++); } do { // If a character is pending, and the next char is '-' then we are // possible looking at a range. if ( ch == QChar('-') && charPending ) { rangePending = true; ch = txt.at(i++); continue; } // If we have a pending character, but do not also have a pending // range, then the pending character was not part of a range, and // should therefore just be added as a single character. if ( charPending && !rangePending ) { res->addCharacter( pendingChar ); charPending = false; } if ( ch == QChar('\\') ) { // Handle the cases where an escape character is specified. ch = txt.at(i++); if ( ch == QChar('a') || ch == QChar('f') || ch == QChar('n') || ch == QChar('r') || ch == QChar('t') || ch == QChar('v') ) { // These are just seen as normal characters. thisChar = QString::fromLocal8Bit("\\") + ch; } else if ( ch == QChar('d') ) { // The following characters represent character groups. If any of // these are seen in a range, then the range is ignored, thus [a-\s] // matches an 'a', a '-', and a space (\s means space). res->setDigit( true ); flushPending = true; } else if ( ch == QChar('D') ) { res->setNonDigit( true ); flushPending = true; } else if ( ch == QChar('s') ) { res->setSpace( true ); flushPending = true; } else if ( ch == QChar('S') ) { res->setNonSpace( true ); flushPending = true; } else if ( ch == QChar('w') ) { res->setWordChar( true ); flushPending = true; } else if ( ch == QChar('W') ) { res->setNonWordChar( true ); flushPending = true; } else if ( ch == QChar('x') || ch == QChar('X') ) { // This is a hexidecimal character: \xHHHH QString str; for ( int j=0; j<4; j++) { ch = txt.at(i++); if ( ch == 'a' || ch == 'A' || ch == 'b' || ch == 'B' || ch == 'c' || ch == 'C' || ch == 'd' || ch == 'D' || ch == 'e' || ch == 'E' || ch == 'f' || ch == 'F' || ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' || ch == '8' || ch == '9' ) str += ch; else i--; } thisChar = QString::fromLocal8Bit("\\x") + str; } else if ( ch == QChar('0') ) { // This is an octal character QString str; for ( int j=0; j<4; j++) { ch = txt.at(i++); if ( ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' ) str += ch; else i--; } thisChar = QString::fromLocal8Bit("\\x") + str ; } else { // Anything else escaped just means the character itself. thisChar = ch; } } else { // A non escaped character. thisChar = ch; } // The characters \s,\S,\w,\W,\d or \D, can not be part of a range, // thus if they are meet in what looks like a range, then the // characters of the range is justed seen as normal non range // characters. thus [a-\s] matches an 'a', a '-', and a space (\s means // space). if ( flushPending ) { if ( charPending ) res->addCharacter( pendingChar ); if ( rangePending ) res->addCharacter( QString::fromLocal8Bit("-") ); flushPending = false; charPending = false; rangePending = false; } else { if ( rangePending ) { res->addRange( pendingChar, thisChar ); charPending = false; rangePending = false; } else { pendingChar = thisChar; charPending = true; } } ch = txt.at(i++); } while ( ch != QChar(']') && i <= txt.length() ); if ( charPending ) res->addCharacter( pendingChar ); if ( rangePending ) res->addCharacter( QString::fromLocal8Bit("-") ); return res; }