diff options
Diffstat (limited to 'khtml/xml/xml_tokenizer.cpp')
-rw-r--r-- | khtml/xml/xml_tokenizer.cpp | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/khtml/xml/xml_tokenizer.cpp b/khtml/xml/xml_tokenizer.cpp new file mode 100644 index 000000000..5489b8c21 --- /dev/null +++ b/khtml/xml/xml_tokenizer.cpp @@ -0,0 +1,609 @@ +/** + * This file is part of the DOM implementation for KDE. + * + * Copyright (C) 2000 Peter Kelly (pmk@post.com) + * Copyright (C) 2003 Apple Computer, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + + +#include "xml_tokenizer.h" +#include "xml/dom_docimpl.h" +#include "xml/dom_textimpl.h" +#include "xml/dom_xmlimpl.h" +#include "html/html_tableimpl.h" +#include "html/html_headimpl.h" +#include "rendering/render_object.h" +#include "misc/htmltags.h" +#include "misc/htmlattrs.h" +#include "misc/loader.h" + +#include "khtmlview.h" +#include "khtml_part.h" +#include <qvariant.h> +#include <kdebug.h> +#include <klocale.h> + +using namespace DOM; +using namespace khtml; + +XMLIncrementalSource::XMLIncrementalSource() + : QXmlInputSource(), m_pos( 0 ), m_unicode( 0 ), + m_finished( false ) +{ +} + +void XMLIncrementalSource::fetchData() +{ + //just a dummy to overwrite default behavior +} + +QChar XMLIncrementalSource::next() +{ + if ( m_finished ) + return QXmlInputSource::EndOfDocument; + else if ( m_data.length() <= m_pos ) + return QXmlInputSource::EndOfData; + else + return m_unicode[m_pos++]; +} + +void XMLIncrementalSource::setData( const QString& str ) +{ + m_data = str; + m_unicode = m_data.unicode(); + m_pos = 0; + if ( !str.isEmpty() ) + m_finished = false; +} +void XMLIncrementalSource::setData( const QByteArray& data ) +{ + setData( fromRawData( data, true ) ); +} + +void XMLIncrementalSource::appendXML( const QString& str ) +{ + m_data += str; + m_unicode = m_data.unicode(); +} + +QString XMLIncrementalSource::data() +{ + return m_data; +} + +void XMLIncrementalSource::setFinished( bool finished ) +{ + m_finished = finished; +} + +XMLHandler::XMLHandler(DocumentImpl *_doc, KHTMLView *_view) + : errorLine(0) +{ + m_doc = _doc; + m_view = _view; + pushNode( _doc ); +} + +XMLHandler::~XMLHandler() +{ +} + +void XMLHandler::pushNode( NodeImpl *node ) +{ + m_nodes.push( node ); +} + +NodeImpl *XMLHandler::popNode() +{ + return m_nodes.pop(); +} + +NodeImpl *XMLHandler::currentNode() const +{ + return m_nodes.current(); +} + +QString XMLHandler::errorProtocol() +{ + return errorProt; +} + + +bool XMLHandler::startDocument() +{ + // at the beginning of parsing: do some initialization + errorProt = ""; + state = StateInit; + + return true; +} + +bool XMLHandler::startPrefixMapping(const QString& prefix, const QString& uri) +{ + namespaceInfo[prefix].push(uri); + return true; +} + +bool XMLHandler::endPrefixMapping(const QString& prefix) +{ + QValueStack<QString>& stack = namespaceInfo[prefix]; + stack.pop(); + if (stack.isEmpty()) + namespaceInfo.remove(prefix); + return true; +} + +void XMLHandler::fixUpNSURI(QString& uri, const QString& qname) +{ + /* QXml does not resolve the namespaces of attributes in the same + tag that preceed the xmlns declaration. This fixes up that case */ + if (uri.isEmpty() && qname.find(':') != -1) { + QXmlNamespaceSupport ns; + QString localName, prefix; + ns.splitName(qname, prefix, localName); + if (namespaceInfo.contains(prefix)) { + uri = namespaceInfo[prefix].top(); + } + } +} + +bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*localName*/, + const QString& qName, const QXmlAttributes& atts ) +{ + if (currentNode()->nodeType() == Node::TEXT_NODE) + exitText(); + + DOMString nsURI; + if (!namespaceURI.isNull()) + nsURI = DOMString(namespaceURI); + else + // No namespace declared, default to the no namespace + nsURI = DOMString(""); + ElementImpl *newElement = m_doc->createElementNS(nsURI,qName); + if (!newElement) + return false; + int i; + for (i = 0; i < atts.length(); i++) { + int exceptioncode = 0; + QString uriString = atts.uri(i); + QString qnString = atts.qName(i); + fixUpNSURI(uriString, qnString); + DOMString uri(uriString); + DOMString qn(qnString); + DOMString val(atts.value(i)); + newElement->setAttributeNS(uri, qn, val, exceptioncode); + if (exceptioncode) // exception setting attributes + return false; + } + + if (newElement->id() == ID_SCRIPT || newElement->id() == makeId(xhtmlNamespace, ID_SCRIPT)) + static_cast<HTMLScriptElementImpl *>(newElement)->setCreatedByParser(true); + + //this is tricky. in general the node doesn't have to attach to the one it's in. as far + //as standards go this is wrong, but there's literally thousands of documents where + //we see <p><ul>...</ul></p>. the following code is there for those cases. + //when we can't attach to the currently holding us node we try to attach to its parent + bool attached = false; + for ( NodeImpl *current = currentNode(); current; current = current->parent() ) { + attached = current->addChild( newElement ); + if ( attached ) + break; + } + if (attached) { + if (m_view && !newElement->attached() && !m_doc->hasPendingSheets()) + newElement->attach(); + pushNode( newElement ); + return true; + } + else { + delete newElement; + return false; + } + + // ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a + // single object implementing the Text interface that is the only child of the element."... do we + // need to ensure that empty elements always have an empty text child? +} + + +bool XMLHandler::endElement( const QString& /*namespaceURI*/, const QString& /*localName*/, const QString& /*qName*/ ) +{ + if (currentNode()->nodeType() == Node::TEXT_NODE) + exitText(); + + NodeImpl *node = popNode(); + if ( node ) { + node->close(); + while ( currentNode() && currentNode()->implicitNode() ) //for the implicit HTMLTableSectionElementImpl + popNode()->close(); + } else + return false; + + return true; +} + + +bool XMLHandler::startCDATA() +{ + if (currentNode()->nodeType() == Node::TEXT_NODE) + exitText(); + + NodeImpl *newNode = m_doc->createCDATASection(new DOMStringImpl("")); + if (currentNode()->addChild(newNode)) { + if (m_view && !newNode->attached() && !m_doc->hasPendingSheets()) + newNode->attach(); + pushNode( newNode ); + return true; + } + else { + delete newNode; + return false; + } + +} + +bool XMLHandler::endCDATA() +{ + popNode(); + Q_ASSERT( currentNode() ); + return currentNode(); +} + +bool XMLHandler::characters( const QString& ch ) +{ + if (currentNode()->nodeType() == Node::TEXT_NODE || + currentNode()->nodeType() == Node::CDATA_SECTION_NODE || + enterText()) { + int exceptioncode = 0; + static_cast<TextImpl*>(currentNode())->appendData(ch,exceptioncode); + if (exceptioncode) + return false; + return true; + } + else { + // Don't worry about white-space violating DTD + if (ch.stripWhiteSpace().isEmpty()) return true; + + return false; + } + +} + +bool XMLHandler::comment(const QString & ch) +{ + if (currentNode()->nodeType() == Node::TEXT_NODE) + exitText(); + // ### handle exceptions + currentNode()->addChild(m_doc->createComment(new DOMStringImpl(ch.unicode(), ch.length()))); + return true; +} + +bool XMLHandler::processingInstruction(const QString &target, const QString &data) +{ + if (currentNode()->nodeType() == Node::TEXT_NODE) + exitText(); + // ### handle exceptions + ProcessingInstructionImpl *pi = + m_doc->createProcessingInstruction(target, new DOMStringImpl(data.unicode(), data.length())); + currentNode()->addChild(pi); + pi->checkStyleSheet(); + return true; +} + + +QString XMLHandler::errorString() +{ + // ### Make better error-messages + return i18n("the document is not in the correct file format"); +} + + +bool XMLHandler::fatalError( const QXmlParseException& exception ) +{ + errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" ) + .arg( exception.message() ) + .arg( exception.lineNumber() ) + .arg( exception.columnNumber() ); + + errorLine = exception.lineNumber(); + errorCol = exception.columnNumber(); + + return false; +} + +bool XMLHandler::enterText() +{ + NodeImpl *newNode = m_doc->createTextNode(""); + if (currentNode()->addChild(newNode)) { + pushNode( newNode ); + return true; + } + else { + delete newNode; + return false; + } +} + +void XMLHandler::exitText() +{ + if ( m_view && !currentNode()->attached() && !m_doc->hasPendingSheets() ) + currentNode()->attach(); + popNode(); +} + +bool XMLHandler::attributeDecl(const QString &/*eName*/, const QString &/*aName*/, const QString &/*type*/, + const QString &/*valueDefault*/, const QString &/*value*/) +{ + // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and + // value. When it does, we can store these somewhere and have default attributes on elements + return true; +} + +bool XMLHandler::externalEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/) +{ + // ### insert these too - is there anything special we have to do here? + return true; +} + +bool XMLHandler::internalEntityDecl(const QString &name, const QString &value) +{ + EntityImpl *e = new EntityImpl(m_doc,name); + // ### further parse entities inside the value and add them as separate nodes (or entityreferences)? + e->addChild(m_doc->createTextNode(new DOMStringImpl(value.unicode(), value.length()))); + if (m_doc->doctype()) + static_cast<GenericRONamedNodeMapImpl*>(m_doc->doctype()->entities())->addNode(e); + return true; +} + +bool XMLHandler::notationDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/) +{ +// ### FIXME +// if (m_doc->document()->doctype()) { +// NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId); +// static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n); +// } + return true; +} + +bool XMLHandler::unparsedEntityDecl(const QString &/*name*/, const QString &/*publicId*/, + const QString &/*systemId*/, const QString &/*notationName*/) +{ + // ### + return true; +} + + +//------------------------------------------------------------------------------ + +XMLTokenizer::XMLTokenizer(DOM::DocumentImpl *_doc, KHTMLView *_view) + : m_handler(_doc,_view) +{ + m_doc = _doc; + m_view = _view; + m_scriptsIt = 0; + m_cachedScript = 0; + m_noErrors = true; + m_reader.setContentHandler( &m_handler ); + m_reader.setLexicalHandler( &m_handler ); + m_reader.setErrorHandler( &m_handler ); + m_reader.setDeclHandler( &m_handler ); + m_reader.setDTDHandler( &m_handler ); + m_reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true); +} + +XMLTokenizer::~XMLTokenizer() +{ + if (m_scriptsIt) + delete m_scriptsIt; + if (m_cachedScript) + m_cachedScript->deref(this); +} + + +void XMLTokenizer::begin() +{ + // parse xml file + m_reader.parse( &m_source, true ); +} + +void XMLTokenizer::write( const TokenizerString &str, bool appendData ) +{ + if ( !m_noErrors && appendData ) + return; + if ( appendData ) { + m_source.appendXML( str.toString() ); + + } else { + m_source.setData( str.toString() ); + } + m_noErrors = m_reader.parseContinue(); +} + +void XMLTokenizer::end() +{ + m_source.setFinished( true ); + //if ( m_noErrors ) + //m_noErrors = m_reader.parseContinue(); + emit finishedParsing(); +} + +void XMLTokenizer::finish() +{ + m_source.setFinished( true ); + if (!m_noErrors) { + // An error occurred during parsing of the code. Display an error page to the user (the DOM + // tree is created manually and includes an excerpt from the code where the error is located) + + // ### for multiple error messages, display the code for each (can this happen?) + + // Clear the document + int exceptioncode = 0; + while (m_doc->hasChildNodes()) + static_cast<NodeImpl*>(m_doc)->removeChild(m_doc->firstChild(),exceptioncode); + + QString line, errorLocPtr; + if ( m_handler.errorLine ) { + QString xmlCode = m_source.data(); + QTextIStream stream(&xmlCode); + for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++) + stream.readLine(); + line = stream.readLine(); + + for (unsigned long colno = 0; colno < m_handler.errorCol-1; colno++) + errorLocPtr += " "; + errorLocPtr += "^"; + } + + // Create elements for display + DocumentImpl *doc = m_doc; + NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html"); + NodeImpl *body = doc->createElementNS(XHTML_NAMESPACE,"body"); + NodeImpl *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1"); + NodeImpl *headingText = doc->createTextNode(i18n("XML parsing error")); + NodeImpl *errorText = doc->createTextNode(m_handler.errorProtocol()); + NodeImpl *hr = 0; + NodeImpl *pre = 0; + NodeImpl *lineText = 0; + NodeImpl *errorLocText = 0; + if ( !line.isNull() ) { + hr = doc->createElementNS(XHTML_NAMESPACE,"hr"); + pre = doc->createElementNS(XHTML_NAMESPACE,"pre"); + lineText = doc->createTextNode(line+"\n"); + errorLocText = doc->createTextNode(errorLocPtr); + } + + // Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the + // fact we are using a known tag set) + doc->appendChild(html,exceptioncode); + html->appendChild(body,exceptioncode); + if ( body ) + body->appendChild(h1,exceptioncode); + h1->appendChild(headingText,exceptioncode); + body->appendChild(errorText,exceptioncode); + body->appendChild(hr,exceptioncode); + body->appendChild(pre,exceptioncode); + if ( pre ) { + pre->appendChild(lineText,exceptioncode); + pre->appendChild(errorLocText,exceptioncode); + } + + // Close the renderers so that they update their display correctly + // ### this should not be necessary, but requires changes in the rendering code... + h1->close(); + if ( pre ) pre->close(); + body->close(); + + m_doc->recalcStyle( NodeImpl::Inherit ); + m_doc->updateRendering(); + + end(); + } + else { + // Parsing was successful. Now locate all html <script> tags in the document and execute them + // one by one + addScripts(m_doc); + m_scriptsIt = new QPtrListIterator<HTMLScriptElementImpl>(m_scripts); + executeScripts(); + } + +} + +void XMLTokenizer::addScripts(NodeImpl *n) +{ + // Recursively go through the entire document tree, looking for html <script> tags. For each of these + // that is found, add it to the m_scripts list from which they will be executed + + if (n->id() == ID_SCRIPT || n->id() == makeId(xhtmlNamespace, ID_SCRIPT)) { + m_scripts.append(static_cast<HTMLScriptElementImpl*>(n)); + } + + NodeImpl *child; + for (child = n->firstChild(); child; child = child->nextSibling()) + addScripts(child); +} + +void XMLTokenizer::executeScripts() +{ + // Iterate through all of the html <script> tags in the document. For those that have a src attribute, + // start loading the script and return (executeScripts() will be called again once the script is loaded + // and continue where it left off). For scripts that don't have a src attribute, execute the code + // inside the tag + while (m_scriptsIt->current()) { + DOMString scriptSrc = m_scriptsIt->current()->getAttribute(ATTR_SRC); + QString charset = m_scriptsIt->current()->getAttribute(ATTR_CHARSET).string(); + + if (!scriptSrc.isEmpty()) { + // we have a src attribute + m_cachedScript = m_doc->docLoader()->requestScript(scriptSrc, charset); + ++(*m_scriptsIt); + if (m_cachedScript) { + m_cachedScript->ref(this); // will call executeScripts() again if already cached + return; + } + } + else { + // no src attribute - execute from contents of tag + QString scriptCode = ""; + NodeImpl *child; + for (child = m_scriptsIt->current()->firstChild(); child; child = child->nextSibling()) { + if ( ( child->nodeType() == Node::TEXT_NODE || child->nodeType() == Node::CDATA_SECTION_NODE) && + static_cast<TextImpl*>(child)->string() ) + scriptCode += QConstString(static_cast<TextImpl*>(child)->string()->s, + static_cast<TextImpl*>(child)->string()->l).string(); + } + // the script cannot do document.write until we support incremental parsing + // ### handle the case where the script deletes the node or redirects to + // another page, etc. (also in notifyFinished()) + // ### the script may add another script node after this one which should be executed + if (m_view) { + m_view->part()->executeScript(DOM::Node(), scriptCode); + } + ++(*m_scriptsIt); + } + } + + // All scripts have finished executing, so calculate the style for the document and close + // the last element + m_doc->updateStyleSelector(); + + // We are now finished parsing + end(); +} + +void XMLTokenizer::notifyFinished(CachedObject *finishedObj) +{ + // This is called when a script has finished loading that was requested from executeScripts(). We execute + // the script, and then call executeScripts() again to continue iterating through the list of scripts in + // the document + if (finishedObj == m_cachedScript) { + DOMString scriptSource = m_cachedScript->script(); + m_cachedScript->deref(this); + m_cachedScript = 0; + if (m_view) + m_view->part()->executeScript(DOM::Node(), scriptSource.string()); + executeScripts(); + } +} + +bool XMLTokenizer::isWaitingForScripts() const +{ + return m_cachedScript != 0; +} + +#include "xml_tokenizer.moc" + |