diff options
Diffstat (limited to 'tdehtml/xml/dom_stringimpl.cpp')
-rw-r--r-- | tdehtml/xml/dom_stringimpl.cpp | 460 |
1 files changed, 460 insertions, 0 deletions
diff --git a/tdehtml/xml/dom_stringimpl.cpp b/tdehtml/xml/dom_stringimpl.cpp new file mode 100644 index 000000000..12f1481c0 --- /dev/null +++ b/tdehtml/xml/dom_stringimpl.cpp @@ -0,0 +1,460 @@ +/** + * This file is part of the DOM implementation for KDE. + * + * Copyright (C) 1999-2003 Lars Knoll (knoll@kde.org) + * (C) 1999 Antti Koivisto (koivisto@kde.org) + * (C) 2001-2003 Dirk Mueller ( mueller@kde.org ) + * (C) 2002 Apple Computer, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "dom_stringimpl.h" + +#include <kdebug.h> + +#include <string.h> +#include <tqstringlist.h> + +using namespace DOM; +using namespace tdehtml; + + +DOMStringImpl::DOMStringImpl(const char *str) +{ + if(str && *str) + { + l = strlen(str); + s = QT_ALLOC_QCHAR_VEC( l ); + int i = l; + TQChar* ptr = s; + while( i-- ) + *ptr++ = *str++; + } + else + { + s = QT_ALLOC_QCHAR_VEC( 1 ); // crash protection + s[0] = 0x0; // == TQChar::null; + l = 0; + } +} + +// FIXME: should be a cached flag maybe. +bool DOMStringImpl::containsOnlyWhitespace() const +{ + if (!s) + return true; + + for (uint i = 0; i < l; i++) { + TQChar c = s[i]; + if (c.unicode() <= 0x7F) { + if (c.unicode() > ' ') + return false; + } else { + if (c.direction() != TQChar::DirWS) + return false; + } + } + return true; +} + + +void DOMStringImpl::append(DOMStringImpl *str) +{ + if(str && str->l != 0) + { + int newlen = l+str->l; + TQChar *c = QT_ALLOC_QCHAR_VEC(newlen); + memcpy(c, s, l*sizeof(TQChar)); + memcpy(c+l, str->s, str->l*sizeof(TQChar)); + if(s) QT_DELETE_QCHAR_VEC(s); + s = c; + l = newlen; + } +} + +void DOMStringImpl::insert(DOMStringImpl *str, unsigned int pos) +{ + if(pos > l) + { + append(str); + return; + } + if(str && str->l != 0) + { + int newlen = l+str->l; + TQChar *c = QT_ALLOC_QCHAR_VEC(newlen); + memcpy(c, s, pos*sizeof(TQChar)); + memcpy(c+pos, str->s, str->l*sizeof(TQChar)); + memcpy(c+pos+str->l, s+pos, (l-pos)*sizeof(TQChar)); + if(s) QT_DELETE_QCHAR_VEC(s); + s = c; + l = newlen; + } +} + +void DOMStringImpl::truncate(int len) +{ + if(len > (int)l) return; + + int nl = len < 1 ? 1 : len; + TQChar *c = QT_ALLOC_QCHAR_VEC(nl); + memcpy(c, s, nl*sizeof(TQChar)); + if(s) QT_DELETE_QCHAR_VEC(s); + s = c; + l = len; +} + +void DOMStringImpl::remove(unsigned int pos, int len) +{ + if(pos >= l ) return; + if(pos+len > l) + len = l - pos; + + uint newLen = l-len; + TQChar *c = QT_ALLOC_QCHAR_VEC(newLen); + memcpy(c, s, pos*sizeof(TQChar)); + memcpy(c+pos, s+pos+len, (l-len-pos)*sizeof(TQChar)); + if(s) QT_DELETE_QCHAR_VEC(s); + s = c; + l = newLen; +} + +DOMStringImpl *DOMStringImpl::split(unsigned int pos) +{ + if( pos >=l ) return new DOMStringImpl(); + + uint newLen = l-pos; + DOMStringImpl *str = new DOMStringImpl(s + pos, newLen); + truncate(pos); + return str; +} + +DOMStringImpl *DOMStringImpl::substring(unsigned int pos, unsigned int len) +{ + if( pos >=l ) return new DOMStringImpl(); + if(pos+len > l) + len = l - pos; + + return new DOMStringImpl(s + pos, len); +} + +// Collapses white-space according to CSS 2.1 rules +DOMStringImpl *DOMStringImpl::collapseWhiteSpace(bool preserveLF, bool preserveWS) +{ + if (preserveLF && preserveWS) return this; + + // Notice we are likely allocating more space than needed (worst case) + TQChar *n = QT_ALLOC_QCHAR_VEC(l); + + unsigned int pos = 0; + bool collapsing = false; // collapsing white-space + bool collapsingLF = false; // collapsing around linefeed + bool changedLF = false; + for(unsigned int i=0; i<l; i++) { + TQChar ch = s[i]; + + // We act on \r as we would on \n because CSS uses it to indicate new-line + if (ch == '\r') ch = '\n'; + else + // ### The XML parser lets \t through, for now treat them as spaces + if (ch == '\t') ch = ' '; + + if (!preserveLF && ch == '\n') { + // ### Not strictly correct according to CSS3 text-module. + // - In ideographic languages linefeed should be ignored + // - and in Thai and Khmer it should be treated as a zero-width space + ch = ' '; // Treat as space + changedLF = true; + } + + if (collapsing) { + if (ch == ' ') + continue; + if (ch == '\n') { + collapsingLF = true; + continue; + } + + n[pos++] = (collapsingLF) ? '\n' : ' '; + collapsing = false; + collapsingLF = false; + } + else + if (!preserveWS && ch == ' ') { + collapsing = true; + continue; + } + else + if (!preserveWS && ch == '\n') { + collapsing = true; + collapsingLF = true; + continue; + } + + n[pos++] = ch; + } + if (collapsing) + n[pos++] = ((collapsingLF) ? '\n' : ' '); + + if (pos == l && !changedLF) { + QT_DELETE_QCHAR_VEC(n); + return this; + } + else { + DOMStringImpl* out = new DOMStringImpl(); + out->s = n; + out->l = pos; + + return out; + } +} + +static Length parseLength(const TQChar *s, unsigned int l) +{ + if (l == 0) { + return Length(1, Relative); + } + + unsigned i = 0; + while (i < l && s[i].isSpace()) + ++i; + if (i < l && (s[i] == '+' || s[i] == '-')) + ++i; + while (i < l && s[i].isDigit()) + ++i; + + bool ok; + int r = TQConstString(s, i).string().toInt(&ok); + + /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */ + while (i < l && (s[i].isDigit() || s[i] == '.')) + ++i; + + /* IE Quirk: Skip any whitespace (20 % => 20%) */ + while (i < l && s[i].isSpace()) + ++i; + + if (ok) { + if (i == l) { + return Length(r, Fixed); + } else { + const TQChar* next = s+i; + + if (*next == '%') + return Length(r, Percent); + + if (*next == '*') + return Length(r, Relative); + } + return Length(r, Fixed); + } else { + if (i < l) { + const TQChar* next = s+i; + + if (*next == '*') + return Length(1, Relative); + + if (*next == '%') + return Length(1, Relative); + } + } + return Length(0, Relative); +} + +tdehtml::Length* DOMStringImpl::toCoordsArray(int& len) const +{ + TQString str(s, l); + for(unsigned int i=0; i < l; i++) { + TQChar cc = s[i]; + if (cc > TQChar('9') || (cc < TQChar('0') && cc != '-' && cc != '*' && cc != '.')) + str[i] = ' '; + } + str = str.simplifyWhiteSpace(); + + len = str.contains(' ') + 1; + tdehtml::Length* r = new tdehtml::Length[len]; + + int i = 0; + int pos = 0; + int pos2; + + while((pos2 = str.find(' ', pos)) != -1) { + r[i++] = parseLength((TQChar *) str.unicode()+pos, pos2-pos); + pos = pos2+1; + } + r[i] = parseLength((TQChar *) str.unicode()+pos, str.length()-pos); + + return r; +} + +tdehtml::Length* DOMStringImpl::toLengthArray(int& len) const +{ + TQString str(s, l); + str = str.simplifyWhiteSpace(); + + len = str.contains(',') + 1; + + // If we have no commas, we have no array. + if( len == 1 ) + return 0L; + + tdehtml::Length* r = new tdehtml::Length[len]; + + int i = 0; + int pos = 0; + int pos2; + + while((pos2 = str.find(',', pos)) != -1) { + r[i++] = parseLength((TQChar *) str.unicode()+pos, pos2-pos); + pos = pos2+1; + } + + /* IE Quirk: If the last comma is the last char skip it and reduce len by one */ + if (str.length()-pos > 0) + r[i] = parseLength((TQChar *) str.unicode()+pos, str.length()-pos); + else + len--; + + return r; +} + +bool DOMStringImpl::isLower() const +{ + unsigned int i; + for (i = 0; i < l; i++) + if (s[i].lower() != s[i]) + return false; + return true; +} + +DOMStringImpl *DOMStringImpl::lower() const +{ + DOMStringImpl *c = new DOMStringImpl; + if(!l) return c; + + c->s = QT_ALLOC_QCHAR_VEC(l); + c->l = l; + + for (unsigned int i = 0; i < l; i++) + c->s[i] = s[i].lower(); + + return c; +} + +DOMStringImpl *DOMStringImpl::upper() const +{ + DOMStringImpl *c = new DOMStringImpl; + if(!l) return c; + + c->s = QT_ALLOC_QCHAR_VEC(l); + c->l = l; + + for (unsigned int i = 0; i < l; i++) + c->s[i] = s[i].upper(); + + return c; +} + +DOMStringImpl *DOMStringImpl::capitalize(bool noFirstCap) const +{ + bool canCapitalize= !noFirstCap; + DOMStringImpl *c = new DOMStringImpl; + if(!l) return c; + + c->s = QT_ALLOC_QCHAR_VEC(l); + c->l = l; + + for (unsigned int i=0; i<l; i++) + { + if (s[i].isLetterOrNumber() && canCapitalize) + { + c->s[i]=s[i].upper(); + canCapitalize=false; + } + else + { + c->s[i]=s[i]; + if (s[i].isSpace()) + canCapitalize=true; + } + } + + return c; +} + +TQString DOMStringImpl::string() const +{ + return TQString(s, l); +} + +int DOMStringImpl::toInt(bool* ok) const +{ + // match \s*[+-]?\d* + unsigned i = 0; + while (i < l && s[i].isSpace()) + ++i; + if (i < l && (s[i] == '+' || s[i] == '-')) + ++i; + while (i < l && s[i].isDigit()) + ++i; + + return TQConstString(s, i).string().toInt(ok); +} + +static const unsigned short amp[] = {'&', 'a', 'm', 'p', ';'}; +static const unsigned short lt[] = {'&', 'l', 't', ';'}; +static const unsigned short gt[] = {'&', 'g', 't', ';'}; + +DOMStringImpl *DOMStringImpl::escapeHTML() +{ + unsigned outL = 0; + for (unsigned int i = 0; i < l; ++i ) { + if ( s[i] == '&' ) + outL += 5; //& + else if (s[i] == '<' || s[i] == '>') + outL += 4; //>/< + else + ++outL; + } + if (outL == l) + return this; + + + DOMStringImpl* toRet = new DOMStringImpl(); + toRet->s = QT_ALLOC_QCHAR_VEC(outL); + toRet->l = outL; + + unsigned outP = 0; + for (unsigned int i = 0; i < l; ++i ) { + if ( s[i] == '&' ) { + memcpy(&toRet->s[outP], amp, sizeof(amp)); + outP += 5; + } else if (s[i] == '<') { + memcpy(&toRet->s[outP], lt, sizeof(lt)); + outP += 4; + } else if (s[i] == '>') { + memcpy(&toRet->s[outP], gt, sizeof(gt)); + outP += 4; + } else { + toRet->s[outP] = s[i]; + ++outP; + } + } + return toRet; +} + |