/**
* This file is part of the DOM implementation for KDE.
*
* Copyright (C) 2000 Peter Kelly (pmk@post.com)
* Copyright (C) 2003 Apple Computer, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "xml_tokenizer.h"
#include "xml/dom_docimpl.h"
#include "xml/dom_textimpl.h"
#include "xml/dom_xmlimpl.h"
#include "html/html_tableimpl.h"
#include "html/html_headimpl.h"
#include "rendering/render_object.h"
#include "misc/htmltags.h"
#include "misc/htmlattrs.h"
#include "misc/loader.h"
#include "khtmlview.h"
#include "khtml_part.h"
#include
#include
#include
using namespace DOM;
using namespace khtml;
XMLIncrementalSource::XMLIncrementalSource()
: QXmlInputSource(), m_pos( 0 ), m_unicode( 0 ),
m_finished( false )
{
}
void XMLIncrementalSource::fetchData()
{
//just a dummy to overwrite default behavior
}
QChar XMLIncrementalSource::next()
{
if ( m_finished )
return QXmlInputSource::EndOfDocument;
else if ( m_data.length() <= m_pos )
return QXmlInputSource::EndOfData;
else
return m_unicode[m_pos++];
}
void XMLIncrementalSource::setData( const QString& str )
{
m_data = str;
m_unicode = m_data.unicode();
m_pos = 0;
if ( !str.isEmpty() )
m_finished = false;
}
void XMLIncrementalSource::setData( const QByteArray& data )
{
setData( fromRawData( data, true ) );
}
void XMLIncrementalSource::appendXML( const QString& str )
{
m_data += str;
m_unicode = m_data.unicode();
}
QString XMLIncrementalSource::data()
{
return m_data;
}
void XMLIncrementalSource::setFinished( bool finished )
{
m_finished = finished;
}
XMLHandler::XMLHandler(DocumentImpl *_doc, KHTMLView *_view)
: errorLine(0)
{
m_doc = _doc;
m_view = _view;
pushNode( _doc );
}
XMLHandler::~XMLHandler()
{
}
void XMLHandler::pushNode( NodeImpl *node )
{
m_nodes.push( node );
}
NodeImpl *XMLHandler::popNode()
{
return m_nodes.pop();
}
NodeImpl *XMLHandler::currentNode() const
{
return m_nodes.current();
}
QString XMLHandler::errorProtocol()
{
return errorProt;
}
bool XMLHandler::startDocument()
{
// at the beginning of parsing: do some initialization
errorProt = "";
state = StateInit;
return true;
}
bool XMLHandler::startPrefixMapping(const QString& prefix, const QString& uri)
{
namespaceInfo[prefix].push(uri);
return true;
}
bool XMLHandler::endPrefixMapping(const QString& prefix)
{
QValueStack& stack = namespaceInfo[prefix];
stack.pop();
if (stack.isEmpty())
namespaceInfo.remove(prefix);
return true;
}
void XMLHandler::fixUpNSURI(QString& uri, const QString& qname)
{
/* QXml does not resolve the namespaces of attributes in the same
tag that preceed the xmlns declaration. This fixes up that case */
if (uri.isEmpty() && qname.find(':') != -1) {
QXmlNamespaceSupport ns;
QString localName, prefix;
ns.splitName(qname, prefix, localName);
if (namespaceInfo.contains(prefix)) {
uri = namespaceInfo[prefix].top();
}
}
}
bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*localName*/,
const QString& qName, const QXmlAttributes& atts )
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
DOMString nsURI;
if (!namespaceURI.isNull())
nsURI = DOMString(namespaceURI);
else
// No namespace declared, default to the no namespace
nsURI = DOMString("");
ElementImpl *newElement = m_doc->createElementNS(nsURI,qName);
if (!newElement)
return false;
int i;
for (i = 0; i < atts.length(); i++) {
int exceptioncode = 0;
QString uriString = atts.uri(i);
QString qnString = atts.qName(i);
fixUpNSURI(uriString, qnString);
DOMString uri(uriString);
DOMString qn(qnString);
DOMString val(atts.value(i));
newElement->setAttributeNS(uri, qn, val, exceptioncode);
if (exceptioncode) // exception setting attributes
return false;
}
if (newElement->id() == ID_SCRIPT || newElement->id() == makeId(xhtmlNamespace, ID_SCRIPT))
static_cast(newElement)->setCreatedByParser(true);
//this is tricky. in general the node doesn't have to attach to the one it's in. as far
//as standards go this is wrong, but there's literally thousands of documents where
//we see
. the following code is there for those cases.
//when we can't attach to the currently holding us node we try to attach to its parent
bool attached = false;
for ( NodeImpl *current = currentNode(); current; current = current->parent() ) {
attached = current->addChild( newElement );
if ( attached )
break;
}
if (attached) {
if (m_view && !newElement->attached() && !m_doc->hasPendingSheets())
newElement->attach();
pushNode( newElement );
return true;
}
else {
delete newElement;
return false;
}
// ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
// single object implementing the Text interface that is the only child of the element."... do we
// need to ensure that empty elements always have an empty text child?
}
bool XMLHandler::endElement( const QString& /*namespaceURI*/, const QString& /*localName*/, const QString& /*qName*/ )
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
NodeImpl *node = popNode();
if ( node ) {
node->close();
while ( currentNode() && currentNode()->implicitNode() ) //for the implicit HTMLTableSectionElementImpl
popNode()->close();
} else
return false;
return true;
}
bool XMLHandler::startCDATA()
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
NodeImpl *newNode = m_doc->createCDATASection(new DOMStringImpl(""));
if (currentNode()->addChild(newNode)) {
if (m_view && !newNode->attached() && !m_doc->hasPendingSheets())
newNode->attach();
pushNode( newNode );
return true;
}
else {
delete newNode;
return false;
}
}
bool XMLHandler::endCDATA()
{
popNode();
Q_ASSERT( currentNode() );
return currentNode();
}
bool XMLHandler::characters( const QString& ch )
{
if (currentNode()->nodeType() == Node::TEXT_NODE ||
currentNode()->nodeType() == Node::CDATA_SECTION_NODE ||
enterText()) {
int exceptioncode = 0;
static_cast(currentNode())->appendData(ch,exceptioncode);
if (exceptioncode)
return false;
return true;
}
else {
// Don't worry about white-space violating DTD
if (ch.stripWhiteSpace().isEmpty()) return true;
return false;
}
}
bool XMLHandler::comment(const QString & ch)
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
// ### handle exceptions
currentNode()->addChild(m_doc->createComment(new DOMStringImpl(ch.unicode(), ch.length())));
return true;
}
bool XMLHandler::processingInstruction(const QString &target, const QString &data)
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
// ### handle exceptions
ProcessingInstructionImpl *pi =
m_doc->createProcessingInstruction(target, new DOMStringImpl(data.unicode(), data.length()));
currentNode()->addChild(pi);
pi->checkStyleSheet();
return true;
}
QString XMLHandler::errorString()
{
// ### Make better error-messages
return i18n("the document is not in the correct file format");
}
bool XMLHandler::fatalError( const QXmlParseException& exception )
{
errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" )
.arg( exception.message() )
.arg( exception.lineNumber() )
.arg( exception.columnNumber() );
errorLine = exception.lineNumber();
errorCol = exception.columnNumber();
return false;
}
bool XMLHandler::enterText()
{
NodeImpl *newNode = m_doc->createTextNode("");
if (currentNode()->addChild(newNode)) {
pushNode( newNode );
return true;
}
else {
delete newNode;
return false;
}
}
void XMLHandler::exitText()
{
if ( m_view && !currentNode()->attached() && !m_doc->hasPendingSheets() )
currentNode()->attach();
popNode();
}
bool XMLHandler::attributeDecl(const QString &/*eName*/, const QString &/*aName*/, const QString &/*type*/,
const QString &/*valueDefault*/, const QString &/*value*/)
{
// qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
// value. When it does, we can store these somewhere and have default attributes on elements
return true;
}
bool XMLHandler::externalEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
{
// ### insert these too - is there anything special we have to do here?
return true;
}
bool XMLHandler::internalEntityDecl(const QString &name, const QString &value)
{
EntityImpl *e = new EntityImpl(m_doc,name);
// ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
e->addChild(m_doc->createTextNode(new DOMStringImpl(value.unicode(), value.length())));
if (m_doc->doctype())
static_cast(m_doc->doctype()->entities())->addNode(e);
return true;
}
bool XMLHandler::notationDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
{
// ### FIXME
// if (m_doc->document()->doctype()) {
// NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
// static_cast(m_doc->document()->doctype()->notations())->addNode(n);
// }
return true;
}
bool XMLHandler::unparsedEntityDecl(const QString &/*name*/, const QString &/*publicId*/,
const QString &/*systemId*/, const QString &/*notationName*/)
{
// ###
return true;
}
//------------------------------------------------------------------------------
XMLTokenizer::XMLTokenizer(DOM::DocumentImpl *_doc, KHTMLView *_view)
: m_handler(_doc,_view)
{
m_doc = _doc;
m_view = _view;
m_scriptsIt = 0;
m_cachedScript = 0;
m_noErrors = true;
m_reader.setContentHandler( &m_handler );
m_reader.setLexicalHandler( &m_handler );
m_reader.setErrorHandler( &m_handler );
m_reader.setDeclHandler( &m_handler );
m_reader.setDTDHandler( &m_handler );
m_reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
}
XMLTokenizer::~XMLTokenizer()
{
if (m_scriptsIt)
delete m_scriptsIt;
if (m_cachedScript)
m_cachedScript->deref(this);
}
void XMLTokenizer::begin()
{
// parse xml file
m_reader.parse( &m_source, true );
}
void XMLTokenizer::write( const TokenizerString &str, bool appendData )
{
if ( !m_noErrors && appendData )
return;
if ( appendData ) {
m_source.appendXML( str.toString() );
} else {
m_source.setData( str.toString() );
}
m_noErrors = m_reader.parseContinue();
}
void XMLTokenizer::end()
{
m_source.setFinished( true );
//if ( m_noErrors )
//m_noErrors = m_reader.parseContinue();
emit finishedParsing();
}
void XMLTokenizer::finish()
{
m_source.setFinished( true );
if (!m_noErrors) {
// An error occurred during parsing of the code. Display an error page to the user (the DOM
// tree is created manually and includes an excerpt from the code where the error is located)
// ### for multiple error messages, display the code for each (can this happen?)
// Clear the document
int exceptioncode = 0;
while (m_doc->hasChildNodes())
static_cast(m_doc)->removeChild(m_doc->firstChild(),exceptioncode);
QString line, errorLocPtr;
if ( m_handler.errorLine ) {
QString xmlCode = m_source.data();
QTextIStream stream(&xmlCode);
for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++)
stream.readLine();
line = stream.readLine();
for (unsigned long colno = 0; colno < m_handler.errorCol-1; colno++)
errorLocPtr += " ";
errorLocPtr += "^";
}
// Create elements for display
DocumentImpl *doc = m_doc;
NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html");
NodeImpl *body = doc->createElementNS(XHTML_NAMESPACE,"body");
NodeImpl *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1");
NodeImpl *headingText = doc->createTextNode(i18n("XML parsing error"));
NodeImpl *errorText = doc->createTextNode(m_handler.errorProtocol());
NodeImpl *hr = 0;
NodeImpl *pre = 0;
NodeImpl *lineText = 0;
NodeImpl *errorLocText = 0;
if ( !line.isNull() ) {
hr = doc->createElementNS(XHTML_NAMESPACE,"hr");
pre = doc->createElementNS(XHTML_NAMESPACE,"pre");
lineText = doc->createTextNode(line+"\n");
errorLocText = doc->createTextNode(errorLocPtr);
}
// Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the
// fact we are using a known tag set)
doc->appendChild(html,exceptioncode);
html->appendChild(body,exceptioncode);
if ( body )
body->appendChild(h1,exceptioncode);
h1->appendChild(headingText,exceptioncode);
body->appendChild(errorText,exceptioncode);
body->appendChild(hr,exceptioncode);
body->appendChild(pre,exceptioncode);
if ( pre ) {
pre->appendChild(lineText,exceptioncode);
pre->appendChild(errorLocText,exceptioncode);
}
// Close the renderers so that they update their display correctly
// ### this should not be necessary, but requires changes in the rendering code...
h1->close();
if ( pre ) pre->close();
body->close();
m_doc->recalcStyle( NodeImpl::Inherit );
m_doc->updateRendering();
end();
}
else {
// Parsing was successful. Now locate all html