/* This file is part of the KDE project * Copyright (C) 2001, 2002 Rolf Magnus * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation version 2. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * * $Id$ */ #include "tdefile_html.h" #include "tdefile_html.moc" #include #include #include #include #include #include #include #include #include typedef KGenericFactory HtmlFactory; K_EXPORT_COMPONENT_FACTORY( tdefile_html, HtmlFactory( "tdefile_html" ) ) KHtmlPlugin::KHtmlPlugin( TQObject *parent, const char *name, const TQStringList &args ) : KFilePlugin( parent, name, args ) { kdDebug(7034) << "html plugin\n"; KFileMimeTypeInfo* info = addMimeTypeInfo("text/html"); KFileMimeTypeInfo::GroupInfo* group; KFileMimeTypeInfo::ItemInfo* item; group = addGroupInfo(info, "General", i18n("General")); addItemInfo(group, "Doctype", i18n("Document Type"), TQVariant::String); addItemInfo(group, "Javascript", i18n("JavaScript"), TQVariant::Bool); item = addItemInfo(group, "Title", i18n("Title"), TQVariant::String); setHint(item, KFileMimeTypeInfo::Name); group = addGroupInfo(info, "Metatags", i18n("Meta Tags")); addVariableInfo(group, TQVariant::String, 0); } bool KHtmlPlugin::readInfo( KFileMetaInfo& info, uint ) { if ( info.path().isEmpty() ) // remote file return false; TQFile f(info.path()); if (!f.open(IO_ReadOnly)) return false; // we're only interested in the header, so just read until before // or until if the author forgot it // In this case, it's better to limit the size of the buffer to something // sensible. Think a 0-filled 3GB file with an .html extension. int maxBufSize = TQMIN(f.size(), 32768); TQByteArray data(maxBufSize + 1); f.readBlock(data.data(), maxBufSize); data[maxBufSize]='\0'; TQString s(data); int start=0, last=0; TQRegExp exp; exp.setCaseSensitive(false); exp.setMinimal(true); KFileMetaInfoGroup group = appendGroup(info, "General"); exp.setPattern("\\s*<\\s*!doctype\\s*([^>]*)\\s*>"); if (exp.search(s, last) != -1) { kdDebug(7034) << "DocType: " << TQString(exp.capturedTexts().join("-")) << endl; appendItem(group, "Doctype", exp.cap(1)); last += exp.matchedLength(); } TQString title; exp.setPattern("<\\s*title\\s*>\\s*(.*)\\s*<\\s*/\\s*title\\s*>"); if (exp.search(s, last) != -1) { title = exp.cap(1); last += exp.matchedLength(); } KFileMetaInfoGroup metatags = appendGroup(info, "Metatags"); TQString meta, name, content; exp.setPattern("<\\s*meta\\s*([^>]*)\\s*>"); TQRegExp rxName("(?:name|http-equiv)\\s*=\\s*\"([^\"]+)\"", false); TQRegExp rxContent("content\\s*=\\s*\"([^\"]+)\"", false); TQRegExp rxCharset("charset\\s*=\\s*(.*)", false); TQTextCodec *codec = 0; // find the meta tags last = 0; while (1) { if ((start=exp.search(s, last)) == -1) break; meta = exp.cap(1); last = start+exp.matchedLength(); kdDebug(7034) << "Found Meta: " << meta << endl; if (rxName.search(meta) == -1) continue; name = rxName.cap(1); if (rxContent.search(meta) == -1) continue; content = rxContent.cap(1); appendItem(metatags, name, content.left(50)); // check if it has a charset defined if ( rxCharset.search(content) != -1 ) { kdDebug(7034) << "CodecForName : " << rxCharset.cap(1) << endl; codec = TQTextCodec::codecForName(rxCharset.cap(1).ascii()); } } if ( ! title.isEmpty() ) { if ( codec ) { title = codec->toUnicode(title.ascii()); kdDebug(7034) << "Codec : " << codec->name() << endl; } appendItem(group, "Title", title); } // find out if it contains javascript exp.setPattern("