/*************************************************************************** pseudoDtd.cpp copyright : (C) 2001-2002 by Daniel Naber email : daniel.naber@t-online.de ***************************************************************************/ /*************************************************************************** This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or ( at your option ) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ***************************************************************************/ #include "pseudo_dtd.h" #include <assert.h> #include <tqdom.h> #include <tqregexp.h> #include <tdelocale.h> #include <tdemessagebox.h> PseudoDTD::PseudoDTD() { // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4: m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set ) } PseudoDTD::~PseudoDTD() { } void PseudoDTD::analyzeDTD( TQString &metaDtdUrl, TQString &metaDtd ) { TQDomDocument doc( "dtdIn_xml" ); if ( ! doc.setContent( metaDtd) ) { KMessageBox::error(0, i18n("The file '%1' could not be parsed. " "Please check that the file is well-formed XML.").arg( metaDtdUrl ), i18n( "XML Plugin Error") ); return; } if ( doc.doctype().name() != "dtd" ) { KMessageBox::error(0, i18n("The file '%1' is not in the expected format. " "Please check that the file is of this type:\n" "-//Norman Walsh//DTD DTDParse V2.0//EN\n" "You can produce such files with dtdparse. " "See the Kate Plugin documentation for more information.").arg( metaDtdUrl ), i18n("XML Plugin Error") ); return; } uint listLength = 0; listLength += doc.elementsByTagName( "entity" ).count(); listLength += doc.elementsByTagName( "element" ).count(); // count this twice, as it will be iterated twice ( TODO: optimize that? ): listLength += doc.elementsByTagName( "attlist" ).count() * 2; TQProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), listLength, 0, "progress", TRUE ); progress.setMinimumDuration( 400 ); progress.setProgress(0); // Get information from meta DTD and put it in TQt data structures for fast access: if( ! parseEntities( &doc, &progress ) ) return; if( ! parseElements( &doc, &progress ) ) return; if( ! parseAttributes( &doc, &progress ) ) return; if( ! parseAttributeValues( &doc, &progress ) ) return; progress.setProgress( listLength ); // just to make sure the dialog disappears } // ======================================================================== // DOM stuff: /** * Iterate through the XML to get a mapping which sub-elements are allowed for * all elements. */ bool PseudoDTD::parseElements( TQDomDocument *doc, TQProgressDialog *progress ) { m_elementsList.clear(); // We only display a list, i.e. we pretend that the content model is just // a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element, // which would otherwise display some elements twice. TQMap<TQString,bool> subelementList; // the bool is not used TQDomNodeList list = doc->elementsByTagName( "element" ); uint listLength = list.count(); // speedup (really! ) for( uint i = 0; i < listLength; i++ ) { if( progress->wasCancelled() ) return false; progress->setProgress( progress->progress()+1 ); // FIXME!: //tqApp->processEvents(); subelementList.clear(); TQDomNode node = list.item( i ); TQDomElement elem = node.toElement(); if( !elem.isNull() ) { // Enter the expanded content model, which may also include stuff not allowed. // We do not care if it's a <sequence-group> or whatever. TQDomNodeList contentModelList = elem.elementsByTagName( "content-model-expanded" ); TQDomNode contentModelNode = contentModelList.item(0); TQDomElement contentModelElem = contentModelNode.toElement(); if( ! contentModelElem.isNull() ) { // check for <pcdata/>: TQDomNodeList pcdataList = contentModelElem.elementsByTagName( "pcdata" ); // check for other sub elements: TQDomNodeList subList = contentModelElem.elementsByTagName( "element-name" ); uint subListLength = subList.count(); for( uint l = 0; l < subListLength; l++ ) { TQDomNode subNode = subList.item(l); TQDomElement subElem = subNode.toElement(); if( !subElem.isNull() ) subelementList[subElem.attribute( "name" )] = true; } // anders: check if this is an EMPTY element, and put "__EMPTY" in the // sub list, so that we can insert tags in empty form if required. TQDomNodeList emptyList = elem.elementsByTagName( "empty" ); if ( emptyList.count() ) subelementList["__EMPTY"] = true; } // Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a> // in the HTML 4.01 Strict DTD): TQDomNodeList exclusionsList = elem.elementsByTagName( "exclusions" ); if( exclusionsList.length() > 0 ) { // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions ) TQDomNode exclusionsNode = exclusionsList.item(0); TQDomElement exclusionsElem = exclusionsNode.toElement(); if( ! exclusionsElem.isNull() ) { TQDomNodeList subList = exclusionsElem.elementsByTagName( "element-name" ); uint subListLength = subList.count(); for( uint l = 0; l < subListLength; l++ ) { TQDomNode subNode = subList.item(l); TQDomElement subElem = subNode.toElement(); if( !subElem.isNull() ) { TQMap<TQString,bool>::Iterator it = subelementList.find( subElem.attribute( "name" ) ); if( it != subelementList.end() ) subelementList.remove(it); } } } } // turn the map into a list: TQStringList subelementListTmp; TQMap<TQString,bool>::Iterator it; for( it = subelementList.begin(); it != subelementList.end(); ++it ) subelementListTmp.append( it.key() ); m_elementsList.insert( elem.attribute( "name" ), subelementListTmp ); } } // end iteration over all <element> nodes return true; } /** * Check which elements are allowed inside a parent element. This returns * a list of allowed elements, but it doesn't care about order or if only a certain * number of occurences is allowed. */ TQStringList PseudoDTD::allowedElements( TQString parentElement ) { if( m_sgmlSupport ) { // find the matching element, ignoring case: TQMap<TQString,TQStringList>::Iterator it; for( it = m_elementsList.begin(); it != m_elementsList.end(); ++it ) { if( it.key().lower() == parentElement.lower() ) return it.data(); } } else if( m_elementsList.contains(parentElement) ) return m_elementsList[parentElement]; return TQStringList(); } /** * Iterate through the XML to get a mapping which attributes are allowed inside * all elements. */ bool PseudoDTD::parseAttributes( TQDomDocument *doc, TQProgressDialog *progress ) { m_attributesList.clear(); // TQStringList allowedAttributes; TQDomNodeList list = doc->elementsByTagName( "attlist" ); uint listLength = list.count(); for( uint i = 0; i < listLength; i++ ) { if( progress->wasCancelled() ) return false; progress->setProgress( progress->progress()+1 ); // FIXME!! //tqApp->processEvents(); ElementAttributes attrs; TQDomNode node = list.item(i); TQDomElement elem = node.toElement(); if( !elem.isNull() ) { TQDomNodeList attributeList = elem.elementsByTagName( "attribute" ); uint attributeListLength = attributeList.count(); for( uint l = 0; l < attributeListLength; l++ ) { TQDomNode attributeNode = attributeList.item(l); TQDomElement attributeElem = attributeNode.toElement(); if( ! attributeElem.isNull() ) { if ( attributeElem.attribute("type") == "#REQUIRED" ) attrs.requiredAttributes.append( attributeElem.attribute("name") ); else attrs.optionalAttributes.append( attributeElem.attribute("name") ); } } m_attributesList.insert( elem.attribute("name"), attrs ); } } return true; } /** Check which attributes are allowed for an element. */ TQStringList PseudoDTD::allowedAttributes( TQString element ) { if( m_sgmlSupport ) { // find the matching element, ignoring case: TQMap<TQString,ElementAttributes>::Iterator it; for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it ) { if( it.key().lower() == element.lower() ) { return it.data().optionalAttributes + it.data().requiredAttributes; } } } else if( m_attributesList.contains(element) ) return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes; return TQStringList(); } TQStringList PseudoDTD::requiredAttributes( const TQString &element ) const { if ( m_sgmlSupport ) { TQMap<TQString,ElementAttributes>::ConstIterator it; for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it ) { if( it.key().lower() == element.lower() ) return it.data().requiredAttributes; } } else if( m_attributesList.contains(element) ) return m_attributesList[element].requiredAttributes; return TQStringList(); } /** * Iterate through the XML to get a mapping which attribute values are allowed * for all attributes inside all elements. */ bool PseudoDTD::parseAttributeValues( TQDomDocument *doc, TQProgressDialog *progress ) { m_attributevaluesList.clear(); // 1 element : n possible attributes TQMap<TQString,TQStringList> attributevaluesTmp; // 1 attribute : n possible values TQDomNodeList list = doc->elementsByTagName( "attlist" ); uint listLength = list.count(); for( uint i = 0; i < listLength; i++ ) { if( progress->wasCancelled() ) return false; progress->setProgress( progress->progress()+1 ); // FIXME! //tqApp->processEvents(); attributevaluesTmp.clear(); TQDomNode node = list.item(i); TQDomElement elem = node.toElement(); if( !elem.isNull() ) { // Enter the list of <attribute>: TQDomNodeList attributeList = elem.elementsByTagName( "attribute" ); uint attributeListLength = attributeList.count(); for( uint l = 0; l < attributeListLength; l++ ) { TQDomNode attributeNode = attributeList.item(l); TQDomElement attributeElem = attributeNode.toElement(); if( ! attributeElem.isNull() ) { TQString value = attributeElem.attribute( "value" ); attributevaluesTmp.insert( attributeElem.attribute("name"), TQStringList::split(TQRegExp(" "), value) ); } } m_attributevaluesList.insert( elem.attribute("name"), attributevaluesTmp ); } } return true; } /** * Check which attributes values are allowed for an attribute in an element * (the element is necessary because e.g. "href" inside <a> could be different * to an "href" inside <link>): */ TQStringList PseudoDTD::attributeValues( TQString element, TQString attribute ) { // Direct access would be faster than iteration of course but not always correct, // because we need to be case-insensitive. if( m_sgmlSupport ) { // first find the matching element, ignoring case: TQMap< TQString,TQMap<TQString,TQStringList> >::Iterator it; for( it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it ) { if( it.key().lower() == element.lower() ) { TQMap<TQString,TQStringList> attrVals = it.data(); TQMap<TQString,TQStringList>::Iterator itV; // then find the matching attribute for that element, ignoring case: for( itV = attrVals.begin(); itV != attrVals.end(); ++itV ) { if( itV.key().lower() == attribute.lower() ) return( itV.data() ); } } } } else if( m_attributevaluesList.contains(element) ) { TQMap<TQString,TQStringList> attrVals = m_attributevaluesList[element]; if( attrVals.contains(attribute) ) return attrVals[attribute]; } // no predefined values available: return TQStringList(); } /** * Iterate through the XML to get a mapping of all entity names and their expanded * version, e.g. nbsp =>  . Parameter entities are ignored. */ bool PseudoDTD::parseEntities( TQDomDocument *doc, TQProgressDialog *progress ) { m_entityList.clear(); TQDomNodeList list = doc->elementsByTagName( "entity" ); uint listLength = list.count(); for( uint i = 0; i < listLength; i++ ) { if( progress->wasCancelled() ) return false; progress->setProgress( progress->progress()+1 ); //FIXME!! //tqApp->processEvents(); TQDomNode node = list.item(i); TQDomElement elem = node.toElement(); if( !elem.isNull() && elem.attribute( "type" ) != "param" ) { // TODO: what's cdata <-> gen ? TQDomNodeList expandedList = elem.elementsByTagName( "text-expanded" ); TQDomNode expandedNode = expandedList.item(0); TQDomElement expandedElem = expandedNode.toElement(); if( ! expandedElem.isNull() ) { TQString exp = expandedElem.text(); // TODO: support more than one &#...; in the expanded text /* TODO include do this when the unicode font problem is solved: if( exp.contains(TQRegExp("^&#x[a-zA-Z0-9]+;$")) ) { // hexadecimal numbers, e.g. "ȶ" uint end = exp.find( ";" ); exp = exp.mid( 3, end-3 ); exp = TQChar(); } else if( exp.contains(TQRegExp("^&#[0-9]+;$")) ) { // decimal numbers, e.g. "ì" uint end = exp.find( ";" ); exp = exp.mid( 2, end-2 ); exp = TQChar( exp.toInt() ); } */ m_entityList.insert( elem.attribute("name"), exp ); } else { m_entityList.insert( elem.attribute("name"), TQString() ); } } } return true; } /** * Get a list of all ( non-parameter ) entities that start with a certain string. */ TQStringList PseudoDTD::entities( TQString start ) { TQStringList entities; TQMap<TQString,TQString>::Iterator it; for( it = m_entityList.begin(); it != m_entityList.end(); ++it ) { if( (*it).startsWith(start) ) { TQString str = it.key(); /* TODO: show entities as unicode character if( !it.data().isEmpty() ) { //str += " -- " + it.data(); TQRegExp re( "&#(\\d+);" ); if( re.search(it.data()) != -1 ) { uint ch = re.cap( 1).toUInt(); str += " -- " + TQChar( ch).decomposition(); } //kdDebug() << "#" << it.data() << endl; } */ entities.append( str ); // TODO: later use a table view } } return entities; } // kate: space-indent on; indent-width 2; replace-tabs on; mixed-indent off;