diff options
Diffstat (limited to 'akregator/src/librss')
27 files changed, 5264 insertions, 0 deletions
diff --git a/akregator/src/librss/COPYING b/akregator/src/librss/COPYING new file mode 100644 index 000000000..cca2a5c9a --- /dev/null +++ b/akregator/src/librss/COPYING @@ -0,0 +1,20 @@ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/akregator/src/librss/Makefile.am b/akregator/src/librss/Makefile.am new file mode 100644 index 000000000..060d393f9 --- /dev/null +++ b/akregator/src/librss/Makefile.am @@ -0,0 +1,22 @@ +INCLUDES = \ + -I$(top_srcdir)/src \ + $(all_includes) + +noinst_LTLIBRARIES = \ + librsslocal.la + +noinst_HEADERS = article.h document.h global.h image.h textinput.h \ + loader.h librss.h enclosure.h + +librsslocal_la_SOURCES = article.cpp document.cpp image.cpp textinput.cpp \ + tools_p.cpp loader.cpp enclosure.cpp category.cpp feeddetector.cpp + +librsslocal_la_METASOURCES = AUTO + +check_PROGRAMS = testlibrss +testlibrss_SOURCES = testlibrss.cpp +testlibrss_LDFLAGS = $(all_libraries) +testlibrss_LDADD = librsslocal.la $(LIB_KIO) + +DOXYGEN_REFERENCES = kdeui +include $(top_srcdir)/admin/Doxyfile.am diff --git a/akregator/src/librss/README b/akregator/src/librss/README new file mode 100644 index 000000000..77d945015 --- /dev/null +++ b/akregator/src/librss/README @@ -0,0 +1,6 @@ +This is NOT original librss by Frerich Raabe, though based on it. + +This version is supposed to be called libsyndication but is not renamed to relieve packaging burden a bit +(honestly, we just didn't yet get to it). + +Please DO NOT report any bugs about it to Frerich, since he most probably did not introduce the found bugs. diff --git a/akregator/src/librss/article.cpp b/akregator/src/librss/article.cpp new file mode 100644 index 000000000..010cf5dcb --- /dev/null +++ b/akregator/src/librss/article.cpp @@ -0,0 +1,290 @@ +/* + * article.cpp + * + * Copyright (c) 2001, 2002, 2003, 2004 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#include "article.h" +#include "tools_p.h" +#include "enclosure.h" +#include "category.h" + +#include <kdebug.h> +#include <krfcdate.h> +#include <kurl.h> +#include <kurllabel.h> +#include <kmdcodec.h> + +#include <qdatetime.h> +#include <qdom.h> + +using namespace RSS; +namespace RSS +{ + KMD5 md5Machine; +} + +struct Article::Private : public Shared +{ + QString title; + KURL link; + QString description; + QDateTime pubDate; + QString guid; + QString author; + bool guidIsPermaLink; + MetaInfoMap meta; + KURL commentsLink; + int numComments; + Enclosure enclosure; + QValueList<Category> categories; +}; + +Article::Article() : d(new Private) +{ +} + +Article::Article(const Article &other) : d(0) +{ + *this = other; +} + +Enclosure Article::enclosure() const +{ + return d->enclosure; +} + +QValueList<Category> Article::categories() const +{ + return d->categories; +} + + +Article::Article(const QDomNode &node, Format format, Version version) : d(new Private) +{ + QString elemText; + + d->numComments=0; + + if (!(elemText = extractTitle(node)).isNull()) + d->title = elemText; + + if (format==AtomFeed) + { + QDomNode n; + for (n = node.firstChild(); !n.isNull(); n = n.nextSibling()) { + const QDomElement e = n.toElement(); + if ( (e.tagName()==QString::fromLatin1("link")) && + (e.attribute(QString::fromLatin1("rel"), QString::fromLatin1("alternate")) == QString::fromLatin1("alternate"))) + { + d->link=n.toElement().attribute(QString::fromLatin1("href")); + break; + } + } + } + else + { + if (!(elemText = extractNode(node, QString::fromLatin1("link"))).isNull()) + d->link = elemText; + } + + + // prefer content/content:encoded over summary/description for feeds that provide it + QString tagName=(format==AtomFeed)? QString::fromLatin1("content"): QString::fromLatin1("content:encoded"); + + if (!(elemText = extractNode(node, tagName, false)).isNull()) + d->description = elemText; + + if (d->description.isEmpty()) + { + if (!(elemText = extractNode(node, QString::fromLatin1("body"), false)).isNull()) + d->description = elemText; + + if (d->description.isEmpty()) // 3rd try: see http://www.intertwingly.net/blog/1299.html + { + if (!(elemText = extractNode(node, QString::fromLatin1((format==AtomFeed)? "summary" : "description"), false)).isNull()) + d->description = elemText; + } + } + + time_t time = 0; + + if (format == AtomFeed) + { + if (version == vAtom_1_0) + elemText = extractNode(node, QString::fromLatin1("updated")); + else + elemText = extractNode(node, QString::fromLatin1("issued")); + + if (!elemText.isNull()) + time = parseISO8601Date(elemText); + } + else + { + elemText = extractNode(node, QString::fromLatin1("pubDate")); + if (!elemText.isNull()) + time = KRFCDate::parseDate(elemText); + } + + if (!(elemText = extractNode(node, QString::fromLatin1("dc:date"))).isNull()) + { + time = parseISO8601Date(elemText); + } + + // 0 means invalid, not epoch (parsers return epoch+1 when parsing epoch, see the KRFCDate::parseDate() docs) + if (time != 0) + d->pubDate.setTime_t(time); + + if (!(elemText = extractNode(node, QString::fromLatin1("wfw:comment"))).isNull()) { + d->commentsLink = elemText; + } + + if (!(elemText = extractNode(node, QString::fromLatin1("slash:comments"))).isNull()) { + d->numComments = elemText.toInt(); + } + + QDomElement element = QDomNode(node).toElement(); + + // in RSS 1.0, we use <item about> attribute as ID + // FIXME: pass format version instead of checking for attribute + + if (!element.isNull() && element.hasAttribute(QString::fromLatin1("rdf:about"))) + { + d->guid = element.attribute(QString::fromLatin1("rdf:about")); // HACK: using ns properly did not work + d->guidIsPermaLink = false; + } + else + { + tagName=(format==AtomFeed)? QString::fromLatin1("id"): QString::fromLatin1("guid"); + QDomNode n = node.namedItem(tagName); + if (!n.isNull()) + { + d->guidIsPermaLink = (format==AtomFeed)? false : true; + if (n.toElement().attribute(QString::fromLatin1("isPermaLink"), "true") == "false") d->guidIsPermaLink = false; + if (!(elemText = extractNode(node, tagName)).isNull()) + d->guid = elemText; + } + } + + if(d->guid.isEmpty()) { + d->guidIsPermaLink = false; + + md5Machine.reset(); + QDomNode n(node); + md5Machine.update(d->title.utf8()); + md5Machine.update(d->description.utf8()); + d->guid = QString(md5Machine.hexDigest().data()); + d->meta[QString::fromLatin1("guidIsHash")] = QString::fromLatin1("true"); + } + + QDomNode enclosure = element.namedItem(QString::fromLatin1("enclosure")); + if (enclosure.isElement()) + d->enclosure = Enclosure::fromXML(enclosure.toElement()); + + d->author = parseItemAuthor(element, format, version); + + for (QDomNode i = node.firstChild(); !i.isNull(); i = i.nextSibling()) + { + if (i.isElement()) + { + if (i.toElement().tagName() == QString::fromLatin1("metaInfo:meta")) + { + QString type = i.toElement().attribute(QString::fromLatin1("type")); + d->meta[type] = i.toElement().text(); + } + else if (i.toElement().tagName() == QString::fromLatin1("category")) + { + d->categories.append(Category::fromXML(i.toElement())); + } + } + } +} + +Article::~Article() +{ + if (d->deref()) + delete d; +} + +QString Article::title() const +{ + return d->title; +} + +QString Article::author() const +{ + return d->author; +} + +const KURL &Article::link() const +{ + return d->link; +} + +QString Article::description() const +{ + return d->description; +} + +QString Article::guid() const +{ + return d->guid; +} + +bool Article::guidIsPermaLink() const +{ + return d->guidIsPermaLink; +} + +const QDateTime &Article::pubDate() const +{ + return d->pubDate; +} + +const KURL &Article::commentsLink() const +{ + return d->commentsLink; +} + +int Article::comments() const +{ + return d->numComments; +} + + +QString Article::meta(const QString &key) const +{ + return d->meta[key]; +} + +KURLLabel *Article::widget(QWidget *parent, const char *name) const +{ + KURLLabel *label = new KURLLabel(d->link.url(), d->title, parent, name); + label->setUseTips(true); + if (!d->description.isNull()) + label->setTipText(d->description); + + return label; +} + +Article &Article::operator=(const Article &other) +{ + if (this != &other) { + other.d->ref(); + if (d && d->deref()) + delete d; + d = other.d; + } + return *this; +} + +bool Article::operator==(const Article &other) const +{ + return d->guid == other.guid(); +} + +// vim:noet:ts=4 diff --git a/akregator/src/librss/article.h b/akregator/src/librss/article.h new file mode 100644 index 000000000..74deb2539 --- /dev/null +++ b/akregator/src/librss/article.h @@ -0,0 +1,172 @@ +/* + * article.h + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_ARTICLE_H +#define LIBRSS_ARTICLE_H + +#include <qmap.h> + +#include "global.h" + +class QDateTime; +class QDomNode; +template <class> class QValueList; +class QString; +class QWidget; +class KURL; +class KURLLabel; + +namespace RSS +{ + class Category; + class Enclosure; + + /** + * Represents an article as stored in a RSS file. You don't have to + * instantiate one of these yourself, the common way to access instances + * is via Document::articles(). + * @see Document::articles() + */ + class KDE_EXPORT Article + { + public: + /** + * A list of articles. + */ + typedef QValueList<Article> List; + + /** + * Default constructor. + */ + Article(); + + /** + * Copy constructor. + * @param other The Article object to copy. + */ + Article(const Article &other); + + /** + * Constructs an Article from a piece of RSS markup. + * @param node A QDomNode which references the DOM leaf to be used + * for constructing the Article. + */ + Article(const QDomNode &node, Format format, Version version); + + /** + * Assignment operator. + * @param other The Article object to clone. + * @return A reference to the cloned Article object. + */ + Article &operator=(const Article &other); + + /** + * Compares two articles. Two articles are treated to be identical + * if all their properties (title, link, description etc.) are + * equal. + * @param other The article this article should be compared with. + * @return Whether the two articles are equal. + */ + bool operator==(const Article &other) const; + + /** + * Convenience method. Simply calls !operator==(). + * @param other The article this article should be compared with. + * @return Whether the two articles are unequal. + */ + bool operator!=(const Article &other) const { return !operator==(other); } + + /** + * Destructor. + */ + virtual ~Article(); + + /** + * RSS 0.90 and upwards + * @return The headline of this article, or QString::null if + * no headline was available. + */ + QString title() const; + + /** + * RSS 0.90 and upwards + * @return A URL referencing the complete text for this article, + * or an empty KURL if no link was available. + * Note that the RSS 0.91 Specification dictates that URLs not + * starting with "http://" or "ftp://" are considered invalid. + */ + const KURL &link() const; + + /** + * RSS 0.91 and upwards + * @return A story synopsis, or QString::null if no description + * was available. + */ + QString description() const; + + /** + * a string desribing the author of the item. + */ + QString author() const; + + /** + * RSS 2.0 and upwards + * @return An article GUID (globally unique identifier). + */ + QString guid() const; + + /** + * RSS 2.0 and upwards + * @return If this article GUID is permalink. Has no meaning when guid() is QString::null. + */ + bool guidIsPermaLink() const; + + /** + * RSS 2.0 and upwards + * @return The date when the article was published. + */ + const QDateTime &pubDate() const; + + const KURL &commentsLink() const; + int comments() const; + + Enclosure enclosure() const; + + /** returns a list of categories this article is assigned to. (RSS2 only, Atom is not supported yet) */ + QValueList<Category> categories() const; + + QString meta(const QString &key) const; + + /** + * @param parent The parent widget for the KURLLabel. + * @param name A name for the widget which will be used internally. + * @return a widget (a KURLLabel in this case) for the Article. + * This makes building a user-interface which contains the + * information in this Article object more convenient. + * The returned KURLLabel's caption will be the title(), clicking + * on it will emit the URL link(), and it has a QToolTip attached + * to it which displays the description() (in case it has one, + * if there is no description, the URL which the label links to + * will be used). + * Note that you have to delete the KURLLabel object returned by + * this method yourself. + */ + KURLLabel *widget(QWidget *parent = 0, const char *name = 0) const; + + typedef QMap<QString, QString> MetaInfoMap; + + private: + struct Private; + Private *d; + }; +} + +#endif // LIBRSS_ARTICLE_H +// vim: noet:ts=4 diff --git a/akregator/src/librss/category.cpp b/akregator/src/librss/category.cpp new file mode 100644 index 000000000..07508ecaf --- /dev/null +++ b/akregator/src/librss/category.cpp @@ -0,0 +1,129 @@ +/* + This file is part of Akregator. + + Copyright (C) 2005 Frank Osterfeld <frank.osterfeld at kdemail.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, permission is given to link this program + with any edition of Qt, and distribute the resulting executable, + without including the source code for Qt in the source distribution. +*/ + +#include "category.h" +#include "tools_p.h" + +#include <qdom.h> +#include <qstring.h> + +class QString; + +namespace RSS +{ + +class Category::CategoryPrivate : public Shared +{ + public: + bool isNull; + QString category; + QString domain; + + bool operator==(const CategoryPrivate &other) const + { + return (isNull && other.isNull) || (category == other.category && domain == other.domain); + } + + static CategoryPrivate* copyOnWrite(CategoryPrivate* ep) + { + if (ep->count > 1) + { + ep->deref(); + ep = new CategoryPrivate(*ep); + } + return ep; + } +}; + +bool Category::isNull() const +{ + return d == 0; +} + +Category Category::fromXML(const QDomElement& e) +{ + Category obj; + if (e.hasAttribute(QString::fromLatin1("domain"))) + obj.d->domain = e.attribute(QString::fromLatin1("domain")); + obj.d->category = e.text(); + obj.d->isNull = false; + return obj; +} + +Category::Category() : d(new CategoryPrivate) +{ + d->isNull = true; +} + +Category::Category(const Category& other) : d(0) +{ + *this = other; +} + +Category::Category(const QString& category, const QString& domain) : d(new CategoryPrivate) +{ + d->isNull = false; + d->category = category; + d->domain = domain; +} + +Category::~Category() +{ + if (d->deref()) + { + delete d; + d = 0; + } +} + +Category& Category::operator=(const Category& other) +{ + if (d != other.d) + { + other.d->ref(); + if (d && d->deref()) + delete d; + d = other.d; + } + return *this; +} + +bool Category::operator==(const Category &other) const +{ + return *d == *other.d; +} + +QString Category::category() const +{ + return !d->isNull ? d->category : QString::null; +} + +QString Category::domain() const +{ + return !d->isNull ? d->domain : QString::null; +} + +} // namespace RSS + + diff --git a/akregator/src/librss/category.h b/akregator/src/librss/category.h new file mode 100644 index 000000000..43267b903 --- /dev/null +++ b/akregator/src/librss/category.h @@ -0,0 +1,64 @@ +/* + This file is part of Akregator. + + Copyright (C) 2005 Frank Osterfeld <frank.osterfeld at kdemail.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, permission is given to link this program + with any edition of Qt, and distribute the resulting executable, + without including the source code for Qt in the source distribution. +*/ + +#ifndef LIBRSS_RSS_CATEGORY_H +#define LIBRSS_RSS_CATEGORY_H + +#include "global.h" + +class QDomDocument; +class QDomElement; +class QString; + +namespace RSS +{ + class KDE_EXPORT Category + { + public: + + static Category fromXML(const QDomElement& e); + + Category(); + Category(const Category& other); + Category(const QString& category, const QString& domain); + virtual ~Category(); + + Category& operator=(const Category& other); + bool operator==(const Category& other) const; + + QString category() const; + + QString domain() const; + + bool isNull() const; + + private: + + class CategoryPrivate; + CategoryPrivate* d; + + }; +} // namespace RSS + +#endif // LIBRSS_RSS_CATEGORY_H diff --git a/akregator/src/librss/document.cpp b/akregator/src/librss/document.cpp new file mode 100644 index 000000000..ad9614f98 --- /dev/null +++ b/akregator/src/librss/document.cpp @@ -0,0 +1,653 @@ +/* + * document.cpp + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + * + */ +#include "document.h" +#include "article.h" +#include "image.h" +#include "textinput.h" +#include "tools_p.h" + +#include <krfcdate.h> +#include <kurl.h> + +#include <qdatetime.h> +#include <qdom.h> +#include <qptrlist.h> + +#include <kdebug.h> + +using namespace RSS; + +struct Document::Private : public Shared +{ + Private() : version(v0_90), image(NULL), textInput(NULL), language(en) + { + format=UnknownFormat; + valid=false; + ttl=-1; + } + + ~Private() + { + delete textInput; + delete image; + } + + Version version; + QString title; + QString description; + KURL link; + Image *image; + TextInput *textInput; + Article::List articles; + Language language; + Format format; + QString copyright; + QDateTime pubDate; + QDateTime lastBuildDate; + QString rating; + KURL docs; + int ttl; + QString managingEditor; + QString webMaster; + HourList skipHours; + DayList skipDays; + bool valid; +}; + +Document::Document() : d(new Private) +{ +} + +Document::Document(const Document &other) : d(0) +{ + *this = other; +} + +static QString extractLink(const QDomNode& node, Format format) +{ + if (format == AtomFeed) + { + QDomNode n; + for (n = node.firstChild(); !n.isNull(); n = n.nextSibling()) { + const QDomElement e = n.toElement(); + if ( (e.tagName() == QString::fromLatin1("link")) + && (e.attribute(QString::fromLatin1("rel"), QString::fromLatin1("alternate")) == QString::fromLatin1("alternate"))) + { + return n.toElement().attribute(QString::fromLatin1("href")); + } + } + } + + return extractNode(node, QString::fromLatin1("link")); + +} + +Document::Document(const QDomDocument &doc) : d(new Private) +{ + QString elemText; + QDomNode rootNode = doc.documentElement(); + + // Determine the version of the present RSS markup. + QString attr; + + // we should probably check that it ISN'T feed or rss, rather than check if it is xhtml + if (rootNode.toElement().tagName()==QString::fromLatin1("html")) + d->valid=false; + else + d->valid=true; + + attr = rootNode.toElement().attribute(QString::fromLatin1("version"), QString::null); + if (rootNode.toElement().tagName()==QString::fromLatin1("feed")) + { + d->format=AtomFeed; + if (attr == QString::fromLatin1("0.3")) + d->version = vAtom_0_3; + else if (attr == QString::fromLatin1("0.2")) /* smt -> review */ + d->version = vAtom_0_2; + else if (attr == QString::fromLatin1("0.1")) /* smt -> review */ + d->version = vAtom_0_1; + else + d->version = vAtom_1_0; + } + else + { + d->format=RSSFeed; + if (attr == QString::fromLatin1("0.91")) + d->version = v0_91; + else if (attr == QString::fromLatin1("0.92")) + d->version = v0_92; + else if (attr == QString::fromLatin1("0.93")) + d->version = v0_93; + else if (attr == QString::fromLatin1("0.94")) + d->version = v0_94; + else // otherwise, we just assume a RSS2 compatible feed. As rss2 is generally + // backward-compatible, this should work + d->version = v2_0; + } + + + if (d->format==UnknownFormat) + { + attr = rootNode.toElement().attribute(QString::fromLatin1("xmlns"), QString::null); + if (!attr.isNull()) { + /* + * Hardcoding these URLs is actually a bad idea, since the DTD doesn't + * dictate a specific namespace. Still, most RSS files seem to use + * these two, so I'll go for them now. If it turns out that many + * mirrors of this RSS namespace are in use, I'll probably have to + * distinguish the RSS versions by analyzing the relationship between + * the nodes. + */ + if (attr == QString::fromLatin1("http://my.netscape.com/rdf/simple/0.9/")) { + d->format=RSSFeed; + d->version = v0_90; + } + else if (attr == QString::fromLatin1("http://purl.org/rss/1.0/")) { + d->format=RSSFeed; + d->version = v1_0; + } + } + } + + QDomNode channelNode; + + if (d->format == AtomFeed) + channelNode=rootNode; + else + channelNode=rootNode.namedItem(QString::fromLatin1("channel")); + + if (!(elemText = extractTitle(channelNode)).isNull()) + d->title = elemText; + QString descriptionTagName = "description"; + + if (d->format == AtomFeed) + { + if (d->version == vAtom_1_0) + descriptionTagName = "subtitle"; + else + descriptionTagName = "tagline"; + } + + if (!(elemText = extractNode(channelNode, descriptionTagName)).isNull()) + d->description = elemText; + + d->link = extractLink(channelNode, d->format); + + + /* This is ugly but necessary since RSS 0.90 and 1.0 have a different parent + * node for <image>, <textinput> and <item> than RSS 0.91-0.94 and RSS 2.0. + */ + QDomNode parentNode; + if (d->version == v0_90 || d->version == v1_0 || d->format == AtomFeed) + parentNode = rootNode; + else + { + // following is a HACK for broken 0.91 feeds like xanga.com's + if (!rootNode.namedItem(QString::fromLatin1("item")).isNull()) + parentNode = rootNode; + else + parentNode = channelNode; + } + + // image and textinput aren't supported by Atom.. handle in case feed provides + QDomNode n = parentNode.namedItem(QString::fromLatin1("image")); + if (!n.isNull()) + d->image = new Image(n); + + n = parentNode.namedItem(QString::fromLatin1("textinput")); + if (!n.isNull()) + d->textInput = new TextInput(n); + + // Our (hopefully faster) version of elementsByTagName() + QString tagName; + if (d->format == AtomFeed) + tagName=QString::fromLatin1("entry"); + else + tagName=QString::fromLatin1("item"); + + for (n = parentNode.firstChild(); !n.isNull(); n = n.nextSibling()) { + const QDomElement e = n.toElement(); + if (e.tagName() == tagName) + d->articles.append(Article(e, d->format, d->version)); + } + + if (!(elemText = extractNode(channelNode, QString::fromLatin1("copyright"))).isNull()) + d->copyright = elemText; + + if (d->format == AtomFeed) + elemText = rootNode.toElement().attribute(QString::fromLatin1("xml:lang"), QString::null); + else + elemText = extractNode(channelNode, QString::fromLatin1("language")); + + if (!elemText.isNull()){ + if (elemText == QString::fromLatin1("af")) + d->language = af; + else if (elemText == QString::fromLatin1("sq")) + d->language = sq; + else if (elemText == QString::fromLatin1("eu")) + d->language = eu; + else if (elemText == QString::fromLatin1("be")) + d->language = be; + else if (elemText == QString::fromLatin1("bg")) + d->language = bg; + else if (elemText == QString::fromLatin1("ca")) + d->language = ca; + else if (elemText == QString::fromLatin1("zh-cn")) + d->language = zh_cn; + else if (elemText == QString::fromLatin1("zh-tw")) + d->language = zh_tw; + else if (elemText == QString::fromLatin1("hr")) + d->language = hr; + else if (elemText == QString::fromLatin1("cs")) + d->language = cs; + else if (elemText == QString::fromLatin1("da")) + d->language = da; + else if (elemText == QString::fromLatin1("nl")) + d->language = nl; + else if (elemText == QString::fromLatin1("nl-be")) + d->language = nl_be; + else if (elemText == QString::fromLatin1("nl-nl")) + d->language = nl_nl; + else if (elemText == QString::fromLatin1("en")) + d->language = en; + else if (elemText == QString::fromLatin1("en-au")) + d->language = en_au; + else if (elemText == QString::fromLatin1("en-bz")) + d->language = en_bz; + else if (elemText == QString::fromLatin1("en-ca")) + d->language = en_ca; + else if (elemText == QString::fromLatin1("en-ie")) + d->language = en_ie; + else if (elemText == QString::fromLatin1("en-jm")) + d->language = en_jm; + else if (elemText == QString::fromLatin1("en-nz")) + d->language = en_nz; + else if (elemText == QString::fromLatin1("en-ph")) + d->language = en_ph; + else if (elemText == QString::fromLatin1("en-za")) + d->language = en_za; + else if (elemText == QString::fromLatin1("en-tt")) + d->language = en_tt; + else if (elemText == QString::fromLatin1("en-gb")) + d->language = en_gb; + else if (elemText == QString::fromLatin1("en-us")) + d->language = en_us; + else if (elemText == QString::fromLatin1("en-zw")) + d->language = en_zw; + else if (elemText == QString::fromLatin1("fo")) + d->language = fo; + else if (elemText == QString::fromLatin1("fi")) + d->language = fi; + else if (elemText == QString::fromLatin1("fr")) + d->language = fr; + else if (elemText == QString::fromLatin1("fr-be")) + d->language = fr_be; + else if (elemText == QString::fromLatin1("fr-ca")) + d->language = fr_ca; + else if (elemText == QString::fromLatin1("fr-fr")) + d->language = fr_fr; + else if (elemText == QString::fromLatin1("fr-lu")) + d->language = fr_lu; + else if (elemText == QString::fromLatin1("fr-mc")) + d->language = fr_mc; + else if (elemText == QString::fromLatin1("fr-ch")) + d->language = fr_ch; + else if (elemText == QString::fromLatin1("gl")) + d->language = gl; + else if (elemText == QString::fromLatin1("gd")) + d->language = gd; + else if (elemText == QString::fromLatin1("de")) + d->language = de; + else if (elemText == QString::fromLatin1("de-at")) + d->language = de_at; + else if (elemText == QString::fromLatin1("de-de")) + d->language = de_de; + else if (elemText == QString::fromLatin1("de-li")) + d->language = de_li; + else if (elemText == QString::fromLatin1("de-lu")) + d->language = de_lu; + else if (elemText == QString::fromLatin1("de-ch")) + d->language = de_ch; + else if (elemText == QString::fromLatin1("el")) + d->language = el; + else if (elemText == QString::fromLatin1("hu")) + d->language = hu; + else if (elemText == QString::fromLatin1("is")) + d->language = is; + else if (elemText == QString::fromLatin1("id")) + d->language = id; + else if (elemText == QString::fromLatin1("ga")) + d->language = ga; + else if (elemText == QString::fromLatin1("it")) + d->language = it; + else if (elemText == QString::fromLatin1("it-it")) + d->language = it_it; + else if (elemText == QString::fromLatin1("it-ch")) + d->language = it_ch; + else if (elemText == QString::fromLatin1("ja")) + d->language = ja; + else if (elemText == QString::fromLatin1("ko")) + d->language = ko; + else if (elemText == QString::fromLatin1("mk")) + d->language = mk; + else if (elemText == QString::fromLatin1("no")) + d->language = no; + else if (elemText == QString::fromLatin1("pl")) + d->language = pl; + else if (elemText == QString::fromLatin1("pt")) + d->language = pt; + else if (elemText == QString::fromLatin1("pt-br")) + d->language = pt_br; + else if (elemText == QString::fromLatin1("pt-pt")) + d->language = pt_pt; + else if (elemText == QString::fromLatin1("ro")) + d->language = ro; + else if (elemText == QString::fromLatin1("ro-mo")) + d->language = ro_mo; + else if (elemText == QString::fromLatin1("ro-ro")) + d->language = ro_ro; + else if (elemText == QString::fromLatin1("ru")) + d->language = ru; + else if (elemText == QString::fromLatin1("ru-mo")) + d->language = ru_mo; + else if (elemText == QString::fromLatin1("ru-ru")) + d->language = ru_ru; + else if (elemText == QString::fromLatin1("sr")) + d->language = sr; + else if (elemText == QString::fromLatin1("sk")) + d->language = sk; + else if (elemText == QString::fromLatin1("sl")) + d->language = sl; + else if (elemText == QString::fromLatin1("es")) + d->language = es; + else if (elemText == QString::fromLatin1("es-ar")) + d->language = es_ar; + else if (elemText == QString::fromLatin1("es-bo")) + d->language = es_bo; + else if (elemText == QString::fromLatin1("es-cl")) + d->language = es_cl; + else if (elemText == QString::fromLatin1("es-co")) + d->language = es_co; + else if (elemText == QString::fromLatin1("es-cr")) + d->language = es_cr; + else if (elemText == QString::fromLatin1("es-do")) + d->language = es_do; + else if (elemText == QString::fromLatin1("es-ec")) + d->language = es_ec; + else if (elemText == QString::fromLatin1("es-sv")) + d->language = es_sv; + else if (elemText == QString::fromLatin1("es-gt")) + d->language = es_gt; + else if (elemText == QString::fromLatin1("es-hn")) + d->language = es_hn; + else if (elemText == QString::fromLatin1("es-mx")) + d->language = es_mx; + else if (elemText == QString::fromLatin1("es-ni")) + d->language = es_ni; + else if (elemText == QString::fromLatin1("es-pa")) + d->language = es_pa; + else if (elemText == QString::fromLatin1("es-py")) + d->language = es_py; + else if (elemText == QString::fromLatin1("es-pe")) + d->language = es_pe; + else if (elemText == QString::fromLatin1("es-pr")) + d->language = es_pr; + else if (elemText == QString::fromLatin1("es-es")) + d->language = es_es; + else if (elemText == QString::fromLatin1("es-uy")) + d->language = es_uy; + else if (elemText == QString::fromLatin1("es-ve")) + d->language = es_ve; + else if (elemText == QString::fromLatin1("sv")) + d->language = sv; + else if (elemText == QString::fromLatin1("sv-fi")) + d->language = sv_fi; + else if (elemText == QString::fromLatin1("sv-se")) + d->language = sv_se; + else if (elemText == QString::fromLatin1("tr")) + d->language = tr; + else if (elemText == QString::fromLatin1("uk")) + d->language = uk; + else + d->language = UndefinedLanguage; + } + + if (d->format == AtomFeed) + tagName=QString::fromLatin1("issued"); // atom doesn't specify this for feeds + // but some broken feeds do this + else + tagName=QString::fromLatin1("pubDate"); + + if (!(elemText = extractNode(channelNode, tagName)).isNull()) { + time_t _time; + + if (d->format == AtomFeed) + _time=parseISO8601Date(elemText); + else + _time=KRFCDate::parseDate(elemText); + /* \bug This isn't really the right way since it will set the date to + * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that + * we cannot distinguish between that date, and invalid values. :-/ + */ + d->pubDate.setTime_t(_time); + } + + if (!(elemText = extractNode(channelNode, QString::fromLatin1("dc:date"))).isNull()) { + time_t _time = parseISO8601Date(elemText); + /* \bug This isn't really the right way since it will set the date to + * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that + * we cannot distinguish between that date, and invalid values. :-/ + */ + d->pubDate.setTime_t(_time); + } + + if (d->format == AtomFeed) + tagName=QString::fromLatin1("modified"); + else + tagName=QString::fromLatin1("lastBuildDate"); + if (!(elemText = extractNode(channelNode, tagName)).isNull()) { + time_t _time; + if (d->format == AtomFeed) + _time = parseISO8601Date(elemText); + else + _time = KRFCDate::parseDate(elemText); + d->lastBuildDate.setTime_t(_time); + } + + if (!(elemText = extractNode(channelNode, QString::fromLatin1("rating"))).isNull()) + d->rating = elemText; + if (!(elemText = extractNode(channelNode, QString::fromLatin1("docs"))).isNull()) + d->docs = elemText; + if (!(elemText = extractNode(channelNode, QString::fromLatin1((d->format == AtomFeed) ? "author" : "managingEditor"))).isNull()) + d->managingEditor = elemText; + if (!(elemText = extractNode(channelNode, QString::fromLatin1("webMaster"))).isNull()) + d->webMaster = elemText; + + if (!(elemText = extractNode(channelNode, QString::fromLatin1("ttl"))).isNull()) + d->ttl = elemText.toUInt(); + + n = channelNode.namedItem(QString::fromLatin1("skipHours")); + if (!n.isNull()) + for (QDomElement e = n.firstChild().toElement(); !e.isNull(); e = e.nextSibling().toElement()) + if (e.tagName() == QString::fromLatin1("hour")) + d->skipHours.append(e.text().toUInt()); + + n = channelNode.namedItem(QString::fromLatin1("skipDays")); + if (!n.isNull()) { + Day day; + QString elemText; + for (QDomElement e = n.firstChild().toElement(); !e.isNull(); e = e.nextSibling().toElement()) + if (e.tagName() == QString::fromLatin1("day")) { + elemText = e.text().lower(); + if (elemText == QString::fromLatin1("monday")) + day = Monday; + else if (elemText == QString::fromLatin1("tuesday")) + day = Tuesday; + else if (elemText == QString::fromLatin1("wednesday")) + day = Wednesday; + else if (elemText == QString::fromLatin1("thursday")) + day = Thursday; + else if (elemText == QString::fromLatin1("friday")) + day = Friday; + else if (elemText == QString::fromLatin1("saturday")) + day = Saturday; + else if (elemText == QString::fromLatin1("sunday")) + day = Sunday; + else + day = UndefinedDay; + if (day != UndefinedDay) + d->skipDays.append(day); + } + } +} + +Document::~Document() +{ + if (d->deref()) + delete d; +} + +bool Document::isValid() const +{ + return d->valid; +} + +Version Document::version() const +{ + return d->version; +} + +QString Document::verbVersion() const +{ + switch (d->version) { + case v0_90: return QString::fromLatin1("0.90"); + case v0_91: return QString::fromLatin1("0.91"); + case v0_92: return QString::fromLatin1("0.92"); + case v0_93: return QString::fromLatin1("0.93"); + case v0_94: return QString::fromLatin1("0.94"); + case v1_0: return QString::fromLatin1("1.0"); + case v2_0: return QString::fromLatin1("2.0"); + case vAtom_0_3: return QString::fromLatin1("0.3"); + case vAtom_0_2: return QString::fromLatin1("0.2"); + case vAtom_0_1: return QString::fromLatin1("0.1"); + case vAtom_1_0: return QString::fromLatin1("1.0"); + } + return QString::null; +} + +QString Document::title() const +{ + return d->title; +} + +QString Document::description() const +{ + return d->description; +} + +const KURL &Document::link() const +{ + return d->link; +} + +Image *Document::image() +{ + return d->image; +} + +const Image *Document::image() const +{ + return d->image; +} + +TextInput *Document::textInput() +{ + return d->textInput; +} + +const TextInput *Document::textInput() const +{ + return d->textInput; +} + +const Article::List &Document::articles() const +{ + return d->articles; +} + +Language Document::language() const +{ + return d->language; +} + +QString Document::copyright() const +{ + return d->copyright; +} + +const QDateTime &Document::pubDate() const +{ + return d->pubDate; +} + +const QDateTime &Document::lastBuildDate() const +{ + return d->lastBuildDate; +} + +QString Document::rating() const +{ + return d->rating; +} + +const KURL &Document::docs() const +{ + return d->docs; +} + +QString Document::managingEditor() const +{ + return d->managingEditor; +} + +QString Document::webMaster() const +{ + return d->webMaster; +} + +const HourList &Document::skipHours() const +{ + return d->skipHours; +} + +const DayList &Document::skipDays() const +{ + return d->skipDays; +} + +int Document::ttl() const +{ + return d->ttl; +} + +Document &Document::operator=(const Document &other) +{ + if (this != &other) { + other.d->ref(); + if (d && d->deref()) + delete d; + d = other.d; + } + return *this; +} + +// vim:noet:ts=4 diff --git a/akregator/src/librss/document.h b/akregator/src/librss/document.h new file mode 100644 index 000000000..161960720 --- /dev/null +++ b/akregator/src/librss/document.h @@ -0,0 +1,237 @@ +/* + * document.h + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_DOCUMENT_H +#define LIBRSS_DOCUMENT_H + +#include "article.h" +#include "global.h" + +class QDateTime; +class QDomDocument; + +namespace RSS +{ + class Image; + class TextInput; + + /** + * Represents a RSS document and provides all the features and properties + * as stored in it. You usually don't need to instantiate this one yourself + * but rather use Loader::loadFrom() to produce a Document object. + * @see Loader::loadForm() + */ + class KDE_EXPORT Document + { + public: + /** + * Default constructor. + */ + Document(); + + /** + * Copy constructor. + * @param other The Document object to copy. + */ + Document(const Document &other); + + /** + * Constructs a Document from a piece of XML markup. + */ + Document(const QDomDocument &doc); + + /** + * Assignment operator. + * @param other The Document object to clone. + * @return A reference to the cloned Document object. + */ + Document &operator=(const Document &other); + + /** + * Destructor. + */ + ~Document(); + + /** + * @return If document is valid + */ + bool isValid() const; + + /** + * @return The version of this document (one of the values of the + * enum RSS::Version). This value can be used to determine which + * features this RSS document provides. + * @see verbVersion() + */ + Version version() const; + + /** + * Convenience method. Differs from version() only in how the result + * is returned. + * @return A QString representing the verbose version of the + * document. + * @see version() + */ + QString verbVersion() const; + + /** + * RSS 0.90 and upwards + * @return The title of the RSS document, or QString::null if no + * title was available. This is often the name of the news source + * from which the RSS document was retrieved. + */ + QString title() const; + + /** + * RSS 0.90 and upwards + * @return The description of the RSS document, or QString::null + * if no description was available. This is usually a short slogan + * or description of the news source from which the RSS document + * was retrieved. + */ + QString description() const; + + /** + * RSS 0.90 and upwards + * @return A link pointing to some website, or an empty KURL if no + * link was available. This URL mostly points to the homepage of + * the news site from which the RSS document was retrieved. + * Note that the RSS 0.91 Specification dictates that URLs not + * starting with "http://" or "ftp://" are considered invalid. + */ + const KURL &link() const; + + /** + * RSS 0.90 and upwards + * @return An Image object as stored in the RSS document, or a + * null pointer if there was no image available. + * @see Image + */ + Image *image(); + + /** + * A version of the method above, with stricter const-ness. + */ + const Image *image() const; + + /** + * RSS 0.90 and upwards + * @return A TextInput object as stored in the RSS document, or a + * null pointer if there was no text input available. + * @see TextInput + */ + TextInput *textInput(); + + /** + * A version of the method above, with stricter const-ness. + */ + const TextInput *textInput() const; + + /** + * RSS 0.90 and upwards + * @return A list of Article objects as stored in the RSS document, + * or a null pointer if there were no articles available. Every RSS + * DTD requires that there is at least one article defined, so a + * null pointer indicates an invalid RSS file! + * @see Article + */ + const Article::List &articles() const; + + /** + * RSS 0.91 and upwards + * @return The language used in the RSS document (for the article + * headlines etc.). This was originally introduced to assist with + * determining the correct page encoding but acts as a solely + * optional information in this library since you don't have to care + * about the encoding as Unicode is used in the whole library. + * @see RSS::Language + */ + Language language() const; + + /** + * RSS 0.91 and upwards + * @return A copyright of the information contained in the RSS + * document, or QString::null if no copyright is available. + */ + QString copyright() const; + + /** + * RSS 0.91 and upwards + * @return The date when the RSS document was published. + */ + const QDateTime &pubDate() const; + + /** + * RSS 0.91 and upwards. + * @return The last time the channel was modified. + */ + const QDateTime &lastBuildDate() const; + + /** + * RSS 0.91 and upwards + * @return A <a href="http://www.w3.org/PICS/#Specs">PICS</a> + * rating for this page. + */ + QString rating() const; + + /** + * RSS 0.91 and upwards + * @return This tag should contain either a URL that references a + * description of the channel, or a pointer to the documentation + * for the format used in the RSS file. + */ + const KURL &docs() const; + + /** + * RSS 0.91 and upwards + * @return The email address of the managing editor of the site, + * the person to contact for editorial inquiries. The suggested + * format for email addresses in RSS documents is + * bull@mancuso.com (Bull Mancuso). + * @see webMaster() + */ + QString managingEditor() const; + + /** + * RSS 0.91 and upwards + * @return The email address of the webmaster for the site, the + * person to contact if there are technical problems with the + * channel, or QString::null if this information isn't available. + * @see managingEditor() + */ + QString webMaster() const; + + /** + * RSS 0.91 and upwards + * @return A list of hours indicating the hours in the day, GMT, + * when the channel is unlikely to be updated. If this item is + * omitted, the channel is assumed to be updated hourly. Each + * hour should be an integer value between 0 and 23. + * @see skipDays() + */ + const HourList &skipHours() const; + + /** + * RSS 0.91 and upwards + * @return A list of \<day\>s of the week, in English, indicating + * the days of the week when the RSS document will not be updated. + * @see skipHours(), DayList, Day + */ + const DayList &skipDays() const; + int ttl() const; + + private: + struct Private; + Private *d; + }; +} + +#endif // LIBRSS_DOCUMENT_H +// vim: noet:ts=4 diff --git a/akregator/src/librss/enclosure.cpp b/akregator/src/librss/enclosure.cpp new file mode 100644 index 000000000..60898f1bf --- /dev/null +++ b/akregator/src/librss/enclosure.cpp @@ -0,0 +1,154 @@ +/* + This file is part of Akregator. + + Copyright (C) 2005 Frank Osterfeld <frank.osterfeld at kdemail.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, permission is given to link this program + with any edition of Qt, and distribute the resulting executable, + without including the source code for Qt in the source distribution. +*/ + +#include "enclosure.h" +#include "tools_p.h" + +#include <qdom.h> +#include <qstring.h> + +#include <kdebug.h> + +namespace RSS +{ + + +class Enclosure::EnclosurePrivate : public Shared +{ + public: + + bool isNull; + QString url; + int length; + QString type; + + bool operator==(const EnclosurePrivate &other) const + { + return ( isNull == other.isNull || (url == other.url && + length == other.length && + type == other.type)); + } +}; + + +Enclosure Enclosure::fromXML(const QDomElement& e) +{ + QString url, type; + int length = -1; + + if (e.hasAttribute(QString::fromLatin1("url"))) + url = e.attribute(QString::fromLatin1("url")); + + if (e.hasAttribute(QString::fromLatin1("length"))) + { + bool ok; + int c = e.attribute(QString::fromLatin1("length")).toInt(&ok); + length = ok ? c : -1; + } + if (e.hasAttribute(QString::fromLatin1("type"))) + type = e.attribute(QString::fromLatin1("type")); + + return Enclosure(url, length, type); +} + +QDomElement Enclosure::toXML(QDomDocument document) const +{ + QDomElement e = document.createElement(QString::fromLatin1("enclosure")); + if (!d->url.isNull()) + e.setAttribute(QString::fromLatin1("url"), d->url); + if (d->length != -1) + e.setAttribute(QString::fromLatin1("length"), QString::number(d->length)); + if (!d->type.isNull()) + e.setAttribute(QString::fromLatin1("type"), d->type); + + return e; +} + +Enclosure::Enclosure() : d(new EnclosurePrivate) +{ + d->isNull = true; + d->length = -1; +} + +Enclosure::Enclosure(const Enclosure& other) : d(0) +{ + *this = other; +} + +Enclosure::Enclosure(const QString& url, int length, const QString& type) : d(new EnclosurePrivate) +{ + d->isNull = false; + d->url = url; + d->length = length; + d->type = type; +} + +Enclosure::~Enclosure() +{ + if (d->deref()) + { + delete d; + d = 0; + } +} + +Enclosure& Enclosure::operator=(const Enclosure& other) +{ + if (d != other.d) + { + other.d->ref(); + if (d && d->deref()) + delete d; + d = other.d; + } + return *this; +} + +bool Enclosure::operator==(const Enclosure &other) const +{ + return *d == *other.d; +} + +bool Enclosure::isNull() const +{ + return d->isNull; +} + +QString Enclosure::url() const +{ + return d->url; +} + +int Enclosure::length() const +{ + return d->length; +} + +QString Enclosure::type() const +{ + return d->type; +} + + +} // namespace RSS diff --git a/akregator/src/librss/enclosure.h b/akregator/src/librss/enclosure.h new file mode 100644 index 000000000..d3db5d12a --- /dev/null +++ b/akregator/src/librss/enclosure.h @@ -0,0 +1,69 @@ +/* + This file is part of Akregator. + + Copyright (C) 2005 Frank Osterfeld <frank.osterfeld at kdemail.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, permission is given to link this program + with any edition of Qt, and distribute the resulting executable, + without including the source code for Qt in the source distribution. +*/ + +#ifndef LIBRSS_RSS_ENCLOSURE_H +#define LIBRSS_RSS_ENCLOSURE_H + +#include "global.h" + +class QDomDocument; +class QDomElement; +class QString; + +namespace RSS +{ + class KDE_EXPORT Enclosure + { + public: + + static Enclosure fromXML(const QDomElement& e); + QDomElement toXML(QDomDocument document) const; + + Enclosure(); + Enclosure(const Enclosure& other); + Enclosure(const QString& url, int length, const QString& type); + virtual ~Enclosure(); + + bool isNull() const; + + Enclosure& operator=(const Enclosure& other); + bool operator==(const Enclosure& other) const; + + /** returns the URL of the enclosure */ + QString url() const; + + /** returns the size of the enclosure in bytes */ + int length() const; + + /** returns the mime type of the enclosure */ + QString type() const; + + private: + + class EnclosurePrivate; + EnclosurePrivate* d; + }; + +} // namespace RSS +#endif // LIBRSS_RSS_ENCLOSURE_H diff --git a/akregator/src/librss/feeddetector.cpp b/akregator/src/librss/feeddetector.cpp new file mode 100644 index 000000000..a45b18776 --- /dev/null +++ b/akregator/src/librss/feeddetector.cpp @@ -0,0 +1,179 @@ +/* + This file is part of Akregator. + + Copyright (C) 2004 Teemu Rytilahti <tpr@d5k.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, permission is given to link this program + with any edition of Qt, and distribute the resulting executable, + without including the source code for Qt in the source distribution. +*/ + +#include <qregexp.h> +#include <qstring.h> +#include <qstringlist.h> +#include <qvaluelist.h> +#include <kcharsets.h> +#include <kurl.h> + +#include "feeddetector.h" + + +using namespace RSS; + +FeedDetectorEntryList FeedDetector::extractFromLinkTags(const QString& s) +{ + //reduce all sequences of spaces, newlines etc. to one space: + QString str = s.simplifyWhiteSpace(); + + // extracts <link> tags + QRegExp reLinkTag("<[\\s]?LINK[^>]*REL[\\s]?=[\\s]?\\\"[\\s]?(ALTERNATE|SERVICE\\.FEED)[\\s]?\\\"[^>]*>", false); + + // extracts the URL (href="url") + QRegExp reHref("HREF[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false); + // extracts type attribute + QRegExp reType("TYPE[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false); + // extracts the title (title="title") + QRegExp reTitle("TITLE[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false); + + int pos = 0; + int matchpos = 0; + + // get all <link> tags + QStringList linkTags; + //int strlength = str.length(); + while ( matchpos != -1 ) + { + matchpos = reLinkTag.search(str, pos); + if (matchpos != -1) + { + linkTags.append( str.mid(matchpos, reLinkTag.matchedLength()) ); + pos = matchpos + reLinkTag.matchedLength(); + } + } + + FeedDetectorEntryList list; + + for ( QStringList::Iterator it = linkTags.begin(); it != linkTags.end(); ++it ) + { + QString type; + int pos = reType.search(*it, 0); + if (pos != -1) + type = reType.cap(1).lower(); + + // we accept only type attributes indicating a feed + if ( type != "application/rss+xml" && type != "application/rdf+xml" + && type != "application/atom+xml" && type != "text/xml" ) + continue; + + QString title; + pos = reTitle.search(*it, 0); + if (pos != -1) + title = reTitle.cap(1); + + title = KCharsets::resolveEntities(title); + + QString url; + pos = reHref.search(*it, 0); + if (pos != -1) + url = reHref.cap(1); + + url = KCharsets::resolveEntities(url); + + // if feed has no title, use the url as preliminary title (until feed is parsed) + if ( title.isEmpty() ) + title = url; + + if ( !url.isEmpty() ) + list.append(FeedDetectorEntry(url, title) ); + } + + + return list; +} + +QStringList FeedDetector::extractBruteForce(const QString& s) +{ + QString str = s.simplifyWhiteSpace(); + + QRegExp reAhrefTag("<[\\s]?A[^>]?HREF=[\\s]?\\\"[^\\\"]*\\\"[^>]*>", false); + + // extracts the URL (href="url") + QRegExp reHref("HREF[\\s]?=[\\s]?\\\"([^\\\"]*)\\\"", false); + + QRegExp rssrdfxml(".*(RSS|RDF|XML)", false); + + int pos = 0; + int matchpos = 0; + + // get all <a href> tags and capture url + QStringList list; + //int strlength = str.length(); + while ( matchpos != -1 ) + { + matchpos = reAhrefTag.search(str, pos); + if ( matchpos != -1 ) + { + QString ahref = str.mid(matchpos, reAhrefTag.matchedLength()); + int hrefpos = reHref.search(ahref, 0); + if ( hrefpos != -1 ) + { + QString url = reHref.cap(1); + + url = KCharsets::resolveEntities(url); + + if ( rssrdfxml.exactMatch(url) ) + list.append(url); + } + + pos = matchpos + reAhrefTag.matchedLength(); + } + } + + return list; +} + +QString FeedDetector::fixRelativeURL(const QString &s, const KURL &baseurl) +{ + QString s2=s; + KURL u; + if (KURL::isRelativeURL(s2)) + { + if (s2.startsWith("//")) + { + s2=s2.prepend(baseurl.protocol()+":"); + u=s2; + } + else if (s2.startsWith("/")) + { + KURL b2(baseurl); + b2.setPath(QString()); // delete path and query, so that only protocol://host remains + b2.setQuery(QString()); + u = KURL(b2, s2.remove(0,1)); // remove leading "/" + } + else + { + u = KURL(baseurl, s2); + } + } + else + u=s2; + + u.cleanPath(); + //kdDebug() << "AKREGATOR_PLUGIN_FIXURL: " << "url=" << s << " baseurl=" << baseurl.url() << " fixed=" << u.url() << + //endl; + return u.url(); +} diff --git a/akregator/src/librss/feeddetector.h b/akregator/src/librss/feeddetector.h new file mode 100644 index 000000000..c27acf76e --- /dev/null +++ b/akregator/src/librss/feeddetector.h @@ -0,0 +1,80 @@ +/* + This file is part of Akregator. + + Copyright (C) 2004 Teemu Rytilahti <tpr@d5k.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, permission is given to link this program + with any edition of Qt, and distribute the resulting executable, + without including the source code for Qt in the source distribution. +*/ + +#ifndef LIBRSS_FEEDDETECTOR_H +#define LIBRSS_FEEDDETECTOR_H + +#include <qstring.h> +#include <qvaluelist.h> + +class QStringList; +class KURL; + +namespace RSS +{ + + class FeedDetectorEntry + { + public: + FeedDetectorEntry() {} + FeedDetectorEntry(const QString& url, const QString& title) + : m_url(url), m_title(title) {} + + const QString& url() const { return m_url; } + const QString& title() const { return m_title; } + + private: + const QString m_url; + const QString m_title; + }; + + typedef QValueList<FeedDetectorEntry> FeedDetectorEntryList; + + /** a class providing functions to detect linked feeds in HTML sources */ + class FeedDetector + { + public: + /** \brief searches an HTML page for feeds listed in @c <link> tags + @c <link> tags with @c rel attribute values @c alternate or + @c service.feed are considered as feeds + @param s the html source to scan (the actual source, no URI) + @return a list containing the detected feeds + */ + static FeedDetectorEntryList extractFromLinkTags(const QString& s); + + /** \brief searches an HTML page for slightly feed-like looking links and catches everything not running away quickly enough. + Extracts links from @c <a @c href> tags which end with @c xml, @c rss or @c rdf + @param s the html source to scan (the actual source, no URI) + @return a list containing the detected feeds + */ + static QStringList extractBruteForce(const QString& s); + + static QString fixRelativeURL(const QString &s, const KURL &baseurl); + + private: + FeedDetector() {} + }; +} + +#endif //LIBRSS_FEEDDETECTOR_H diff --git a/akregator/src/librss/global.h b/akregator/src/librss/global.h new file mode 100644 index 000000000..cec9609c7 --- /dev/null +++ b/akregator/src/librss/global.h @@ -0,0 +1,148 @@ +/* + * global.h + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_GLOBAL_H +#define LIBRSS_GLOBAL_H + +#include <kdepimmacros.h> + +template <class> +class QValueList; + +namespace RSS +{ + /** + * Versions currently supported by this library. This enumeration is + * subject to be extended in the future and used by Document::version() to + * provide an interface to the client using which he can find out what + * version the loaded RSS file actually is. + */ + enum Version { + v0_90, /// RSS v0.90 + v0_91, /// RSS v0.91 + v0_92, /// RSS v0.92 + v0_93, /// RSS v0.93 + v0_94, /// RSS v0.94 + v1_0, /// RSS v1.0 + v2_0, /// RSS v2.0 + vAtom_0_1, /// Atom v0.1 + vAtom_0_2, /// Atom v0.2 + vAtom_0_3, /// Atom v0.3 + vAtom_1_0 /// Atom v1.0 + }; + + /** + * Possible status values returned by the signal + * Loader::loadingComplete(). + */ + enum Status { + Success, /** + * Nothing went wrong so far, but you still have to check + * what values are returned by the classes since it's not + * guaranteed that the retrieved RSS markup actually + * complies to one of the RSS DTDs.*/ + Aborted, /** the loader was aborted manually + */ + RetrieveError, /** + * Something went wrong while retrieving the RSS data, + * this could be a problem while resolving the host name + * (assuming the source file loader was used) or a + * problem with the program to be executed (in case the + * program loader was used.).*/ + ParseError /** + * The overall format of the RSS markup wasn't XML + * conform. This only indicates that the data wasn't + * valid (for example, if the data returned by a + * DataRetriever isn't well-formed XML). + * @see DataRetriever */ + }; + + /** + * Possible languages which are returned by Document::language(). + */ + enum Language { + UndefinedLanguage, /** Unknown / undefined language */ + + af, /** Afrikaans */ sq, /** Albanian */ + eu, /** Basque */ be, /** Belarusian */ + bg, /** Bulgarian */ ca, /** Catalan */ + zh_cn, /** Chinese (Simplified) */ zh_tw, /** Chinese (Traditional */ + hr, /** Croatian */ cs, /** Czech */ + da, /** Danish */ nl, /** Dutch */ + nl_be, /** Dutch (Belgium) */ nl_nl, /** Dutch (Netherlands) */ + en, /** English */ en_au, /** English (Australia) */ + en_bz, /** English (Belize) */ en_ca, /** English (Canada) */ + en_ie, /** English (Ireland) */ en_jm, /** English (Jamaica) */ + en_nz, /** English (New Zealand) */ en_ph, /** English (Phillipines) */ + en_za, /** English (South Africa) */ en_tt, /** English (Trinidad) */ + en_gb, /** English (Great Britain) */en_us, /** English (United States) */ + en_zw, /** English (Zimbabwe) */ fo, /** Faeroese */ + fi, /** Finnish */ fr, /** French */ + fr_be, /** French (Belgium) */ fr_ca, /** French (Canada) */ + fr_fr, /** French (France) */ fr_lu, /** French (Luxembourg) */ + fr_mc, /** French (Monaco) */ fr_ch, /** French (Switzerland) */ + gl, /** Galician */ gd, /** Gaelic */ + de, /** German */ de_at, /** German (Austria) */ + de_de, /** German (Germany) */ de_li, /** German (Liechtenstein) */ + de_lu, /** German (Luxembourg) */ de_ch, /** German (Switzerland) */ + el, /** Greek */ hu, /** Hungarian */ + is, /** Icelandic */ id, /** Indonesian */ + ga, /** Irish */ it, /** Italian */ + it_it, /** Italian (Italy) */ it_ch, /** Italian (Switzerland) */ + ja, /** Japanese */ ko, /** Korean */ + mk, /** Macedonian */ no, /** Norwegian */ + pl, /** Polish */ pt, /** Portuguese */ + pt_br, /** Portuguese (Brazil) */ pt_pt, /** Portuguese (Portugal) */ + ro, /** Romanian */ ro_mo, /** Romanian (Moldova) */ + ro_ro, /** Romanian (Romania) */ ru, /** Russian */ + ru_mo, /** Russian (Moldova) */ ru_ru, /** Russian (Russia) */ + sr, /** Serbian */ sk, /** Slovak */ + sl, /** Slovenian */ es, /** Spanish */ + es_ar, /** Spanish (Argentina) */ es_bo, /** Spanish (Bolivia) */ + es_cl, /** Spanish (Chile) */ es_co, /** Spanish (Colombia) */ + es_cr, /** Spanish (Costa Rica) */ es_do, /** Spanish (Dominican Rep.) */ + es_ec, /** Spanish (Ecuador) */ es_sv, /** Spanish (El Salvador) */ + es_gt, /** Spanish (Guatemala) */ es_hn, /** Spanish (Honduras) */ + es_mx, /** Spanish (Mexico) */ es_ni, /** Spanish (Nicaragua) */ + es_pa, /** Spanish (Panama) */ es_py, /** Spanish (Paraguay) */ + es_pe, /** Spanish (Peru) */ es_pr, /** Spanish (Puerto Rico) */ + es_es, /** Spanish (Spain) */ es_uy, /** Spanish (Uruguay) */ + es_ve, /** Spanish (Venezuela) */ sv, /** Swedish */ + sv_fi, /** Swedish (Finland) */ sv_se, /** Swedish (Sweden) */ + tr, /** Turkish */ uk /** Ukranian */ + }; + + /** + * Possible values contained in a DayList. + */ + enum Day { + UndefinedDay, + Monday = 1, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday + }; + + enum Format { + UnknownFormat, + AtomFeed, + RSSFeed + }; + + /** + * This type is used by Document::skipDays(). + */ + typedef QValueList<Day> DayList; + + /** + * This type is used by Document::skipHours(). + */ + typedef QValueList<unsigned short> HourList; +} + +#endif // LIBRSS_GLOBAL_H +// vim: noet:ts=4 diff --git a/akregator/src/librss/image.cpp b/akregator/src/librss/image.cpp new file mode 100644 index 000000000..33e1544ae --- /dev/null +++ b/akregator/src/librss/image.cpp @@ -0,0 +1,167 @@ +/* + * image.cpp + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#include "image.h" +#include "tools_p.h" + +#include <kio/job.h> +#include <kurl.h> + +#include <qbuffer.h> +#include <qdom.h> +#include <qpixmap.h> + +using namespace RSS; + +struct Image::Private : public Shared +{ + Private() : height(31), width(88), pixmapBuffer(NULL), job(NULL) + { } + + QString title; + KURL url; + KURL link; + QString description; + unsigned int height; + unsigned int width; + QBuffer *pixmapBuffer; + KIO::Job *job; +}; + +Image::Image() : QObject(), d(new Private) +{ +} + +Image::Image(const Image &other) : QObject(), d(0) +{ + *this = other; +} + +Image::Image(const QDomNode &node) : QObject(), d(new Private) +{ + QString elemText; + + if (!(elemText = extractNode(node, QString::fromLatin1("title"))).isNull()) + d->title = elemText; + if (!(elemText = extractNode(node, QString::fromLatin1("url"))).isNull()) + d->url = elemText; + if (!(elemText = extractNode(node, QString::fromLatin1("link"))).isNull()) + d->link = elemText; + if (!(elemText = extractNode(node, QString::fromLatin1("description"))).isNull()) + d->description = elemText; + if (!(elemText = extractNode(node, QString::fromLatin1("height"))).isNull()) + d->height = elemText.toUInt(); + if (!(elemText = extractNode(node, QString::fromLatin1("width"))).isNull()) + d->width = elemText.toUInt(); +} + +Image::~Image() +{ + if (d->deref()) + { + delete d->pixmapBuffer; + d->pixmapBuffer=0L; + delete d; + } +} + +QString Image::title() const +{ + return d->title; +} + +const KURL &Image::url() const +{ + return d->url; +} + +const KURL &Image::link() const +{ + return d->link; +} + +QString Image::description() const +{ + return d->description; +} + +unsigned int Image::height() const +{ + return d->height; +} + +unsigned int Image::width() const +{ + return d->width; +} + +void Image::getPixmap() +{ + // Ignore subsequent calls if we didn't finish the previous download. + if (d->pixmapBuffer) + return; + + d->pixmapBuffer = new QBuffer; + d->pixmapBuffer->open(IO_WriteOnly); + + d->job = KIO::get(d->url, false, false); + connect(d->job, SIGNAL(data(KIO::Job *, const QByteArray &)), + this, SLOT(slotData(KIO::Job *, const QByteArray &))); + connect(d->job, SIGNAL(result(KIO::Job *)), this, SLOT(slotResult(KIO::Job *))); +} + +void Image::slotData(KIO::Job *, const QByteArray &data) +{ + d->pixmapBuffer->writeBlock(data.data(), data.size()); +} + +void Image::slotResult(KIO::Job *job) +{ + QPixmap pixmap; + if (!job->error()) + pixmap = QPixmap(d->pixmapBuffer->buffer()); + emit gotPixmap(pixmap); + + delete d->pixmapBuffer; + d->pixmapBuffer = NULL; +} + +void Image::abort() +{ + if (d->job) + { + d->job->kill(true); + d->job = NULL; + } +} + +Image &Image::operator=(const Image &other) +{ + if (this != &other) { + other.d->ref(); + if (d && d->deref()) + delete d; + d = other.d; + } + return *this; +} + +bool Image::operator==(const Image &other) const +{ + return d->title == other.title() && + d->url == other.url() && + d->description == other.description() && + d->height == other.height() && + d->width == other.width() && + d->link == other.link(); +} + +#include "image.moc" +// vim:noet:ts=4 diff --git a/akregator/src/librss/image.h b/akregator/src/librss/image.h new file mode 100644 index 000000000..74197edb6 --- /dev/null +++ b/akregator/src/librss/image.h @@ -0,0 +1,173 @@ +/* + * image.h + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_IMAGE_H +#define LIBRSS_IMAGE_H + +#include "global.h" + +#include <qobject.h> + +class QDomNode; + +namespace KIO +{ + class Job; +} +class KURL; + +namespace RSS +{ + /** + * Represents an image as stored in a RSS file. You don't have to + * instantiate one of these yourself, the common way to access instances + * is via Document::image(). + * @see Document::image() + */ + class KDE_EXPORT Image : public QObject + { + Q_OBJECT + public: + /** + * Default constructor. + */ + Image(); + + /** + * Copy constructor. + * @param other The Image object to copy. + */ + Image(const Image &other); + + /** + * Constructs an Image from a piece of RSS markup. + * @param node A QDomNode which references the DOM leaf to be used + * for constructing the Image. + */ + Image(const QDomNode &node); + + /** + * Assignment operator. + * @param other The Image object to clone. + * @return A reference to the cloned Image object. + */ + Image &operator=(const Image &other); + + /** + * Compares two images. Two images are considered identical if + * their properties (title, description, link etc.) are identical. + * Note that this does not include the actual pixmap data! + * @param other The image to compare with. + * @return Whether the two images are equal. + */ + bool operator==(const Image &other) const; + + /** + * Convenience method. Simply calls !operator==(). + * @param other The image to compared with. + * @return Whether the two images are unequal. + */ + bool operator!=(const Image &other) const { return !operator==(other); } + + /** + * Destructor. + */ + virtual ~Image(); + + /** + * RSS 0.90 and upwards + * @return The 'caption' of this image, or QString::null if no + * caption is available. + */ + QString title() const; + + /** + * RSS 0.90 and upwards + * @return The URL pointing to the file containing the graphic + * data (GIF, JPEG or PNG format), or an empty KURL if no URL + * is available. You can use getPixmap() and gotPixmap() to have + * the Image download the pixmap data itself. + * Note that the RSS 0.91 Specification dictates that URLs not + * starting with "http://" or "ftp://" are considered invalid. + */ + const KURL &url() const; + + /** + * RSS 0.90 and upwards + * @return A link to some resource, or an empty KURL of no link is + * available. Clicking on the image should lead the user to the + * resource referenced by this URL. + * Note that the RSS 0.91 Specification dictates that URLs not + * starting with "http://" or "ftp://" are considered invalid. + */ + const KURL &link() const; + + /** + * RSS 0.91 and upwards + * @return A description of what this picture shows, or + * QString::null if no description is available. Useful for + * people who deactivated images but want or need to know what is + * shown. + */ + QString description() const; + + /** + * RSS 0.91 and upwards + * @return The height in pixels as reported by the news site, the + * default value is 31 pixels. The RSS 0.91 Specification requires + * this value to be between 1 and 400. + * '0' if this information isn't available. This is merely provided + * for completeness, you should not rely on this value but rather + * check what height the QPixmap as returned by gotPixmap() + * reports. + */ + unsigned int height() const; + + /** + * RSS 0.91 and upwards + * @return The width in pixels as reported by the news site, the + * default value is 88 pixels. The RSS 0.91 Specification requires + * this value to be between 1 and 144. + * This is merely provided for completeness, you should not rely + * on this value but rather check what width the QPixmap as + * returned by gotPixmap() reports. + */ + unsigned int width() const; + + /** + * Makes the image download the image data as referenced by the + * URL returned by url(). You have to connect to the signal + * gotPixmap() first and then call getPixmap(). + */ + void getPixmap(); + void abort(); + + signals: + /** + * Emitted when this Image is done downloading the actual graphics + * data as referenced by the URL returned by url(). You can trigger + * this download by calling getPixmap(). + * @param pixmap The pixmap as constructed from the data referenced + * by the URL returned by link(). + */ + void gotPixmap(const QPixmap &pixmap); + + private slots: + void slotData(KIO::Job *job, const QByteArray &data); + void slotResult(KIO::Job *job); + + private: + struct Private; + Private *d; + }; +} + +#endif // LIBRSS_IMAGE_H +// vim: noet:ts=4 diff --git a/akregator/src/librss/librss.doxyfile b/akregator/src/librss/librss.doxyfile new file mode 100644 index 000000000..c81ac168e --- /dev/null +++ b/akregator/src/librss/librss.doxyfile @@ -0,0 +1,921 @@ +# Doxyfile 1.2.14 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# General configuration options +#--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = librss + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 0.1 + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc/ + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Chinese, Croatian, Czech, Danish, Dutch, Finnish, French, +# German, Greek, Hungarian, Italian, Japanese, Korean, Norwegian, Polish, +# Portuguese, Romanian, Russian, Slovak, Slovene, Spanish and Swedish. + +OUTPUT_LANGUAGE = English + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = YES + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these class will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited +# members of a class in the documentation of that class as if those members were +# ordinary class members. Constructors, destructors and assignment operators of +# the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. It is allowed to use relative paths in the argument list. + +STRIP_FROM_PATH = + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower case letters. If set to YES upper case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# users are adviced to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explict @brief command for a brief description. + +JAVADOC_AUTOBRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# reimplements. + +INHERIT_DOCS = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 4 + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consist of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. +# For instance some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = . + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp +# *.h++ *.idl *.odl + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories +# that are symbolic links (a Unix filesystem feature) are excluded from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. + +INPUT_FILTER = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse. + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the Html help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript and frames is required (for instance Mozilla, Netscape 4.0+, +# or Internet explorer 4.0+). Note that for large projects the tree generation +# can take a very long time. In such cases it is better to disable this feature. +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = YES + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimised for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assigments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_XML = NO + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_PREDEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line and do not end with a semicolon. Such function macros are typically +# used for boiler-plate code, and will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::addtions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES tag can be used to specify one or more tagfiles. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in Html, RTF and LaTeX) for classes with base or +# super classes. Setting the tag to NO turns the diagrams off. Note that this +# option is superceded by the HAVE_DOT option below. This is only a fallback. It is +# recommended to install and use dot, since it yield more powerful graphs. + +CLASS_DIAGRAMS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are gif, jpg, and png +# If left blank gif will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found on the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermedate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::addtions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO + +# The CGI_NAME tag should be the name of the CGI script that +# starts the search engine (doxysearch) with the correct parameters. +# A script with this name will be generated by doxygen. + +CGI_NAME = search.cgi + +# The CGI_URL tag should be the absolute URL to the directory where the +# cgi binaries are located. See the documentation of your http daemon for +# details. + +CGI_URL = + +# The DOC_URL tag should be the absolute URL to the directory where the +# documentation is located. If left blank the absolute path to the +# documentation, with file:// prepended to it, will be used. + +DOC_URL = + +# The DOC_ABSPATH tag should be the absolute path to the directory where the +# documentation is located. If left blank the directory on the local machine +# will be used. + +DOC_ABSPATH = + +# The BIN_ABSPATH tag must point to the directory where the doxysearch binary +# is installed. + +BIN_ABSPATH = /usr/local/bin/ + +# The EXT_DOC_PATHS tag can be used to specify one or more paths to +# documentation generated for other projects. This allows doxysearch to search +# the documentation for these projects as well. + +EXT_DOC_PATHS = diff --git a/akregator/src/librss/librss.h b/akregator/src/librss/librss.h new file mode 100644 index 000000000..b99556b1c --- /dev/null +++ b/akregator/src/librss/librss.h @@ -0,0 +1,24 @@ +/* + * librss.h + * + * Copyright (c) 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_LIBRSS_H +#define LIBRSS_LIBRSS_H + +#include "article.h" +#include "category.h" +#include "document.h" +#include "enclosure.h" +#include "global.h" +#include "image.h" +#include "loader.h" +#include "textinput.h" + +#endif // LIBRSS_LIBRSS_H +// vim: noet:ts=4 diff --git a/akregator/src/librss/loader.cpp b/akregator/src/librss/loader.cpp new file mode 100644 index 000000000..d63d17676 --- /dev/null +++ b/akregator/src/librss/loader.cpp @@ -0,0 +1,413 @@ +/* + * loader.cpp + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#include "loader.h" +#include "document.h" +#include "feeddetector.h" + +#include <kio/job.h> +#include <kprocess.h> +#include <kstaticdeleter.h> +#include <kurl.h> +#include <kdebug.h> + +#include <qdom.h> +#include <qbuffer.h> +#include <qregexp.h> +#include <qstring.h> +#include <qstringlist.h> +#include <qtimer.h> + +using namespace RSS; + +DataRetriever::DataRetriever() +{ +} + +DataRetriever::~DataRetriever() +{ +} + +class FileRetriever::Private +{ + public: + + Private() + : buffer(NULL), + lastError(0), job(NULL) + { + } + + ~Private() + { + delete buffer; + } + + QBuffer *buffer; + int lastError; + KIO::Job *job; + static KStaticDeleter<QString> userAgentsd; + static QString* userAgent; +}; + +KStaticDeleter<QString> FileRetriever::Private::userAgentsd; +QString* FileRetriever::Private::userAgent = 0L; +FileRetriever::FileRetriever() + : d(new Private) +{ +} + +FileRetriever::~FileRetriever() +{ + delete d; +} + +bool FileRetriever::m_useCache = true; + +QString FileRetriever::userAgent() +{ + if (Private::userAgent == 0L) + FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString); + return *Private::userAgent; +} + +void FileRetriever::setUserAgent(const QString &ua) +{ + if (Private::userAgent == 0L) + FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new QString); + (*Private::userAgent) = ua; +} + +void FileRetriever::setUseCache(bool enabled) +{ + m_useCache = enabled; +} + +void FileRetriever::retrieveData(const KURL &url) +{ + if (d->buffer) + return; + + d->buffer = new QBuffer; + d->buffer->open(IO_WriteOnly); + + KURL u=url; + + if (u.protocol()=="feed") + u.setProtocol("http"); + + d->job = KIO::get(u, false, false); + d->job->addMetaData("cache", m_useCache ? "refresh" : "reload"); + + QString ua = userAgent(); + if (!ua.isEmpty()) + d->job->addMetaData("UserAgent", ua); + + + QTimer::singleShot(1000*90, this, SLOT(slotTimeout())); + + connect(d->job, SIGNAL(data(KIO::Job *, const QByteArray &)), + SLOT(slotData(KIO::Job *, const QByteArray &))); + connect(d->job, SIGNAL(result(KIO::Job *)), SLOT(slotResult(KIO::Job *))); + connect(d->job, SIGNAL(permanentRedirection(KIO::Job *, const KURL &, const KURL &)), + SLOT(slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &))); +} + +void FileRetriever::slotTimeout() +{ + abort(); + + delete d->buffer; + d->buffer = NULL; + + d->lastError = KIO::ERR_SERVER_TIMEOUT; + + emit dataRetrieved(QByteArray(), false); +} + +int FileRetriever::errorCode() const +{ + return d->lastError; +} + +void FileRetriever::slotData(KIO::Job *, const QByteArray &data) +{ + d->buffer->writeBlock(data.data(), data.size()); +} + +void FileRetriever::slotResult(KIO::Job *job) +{ + QByteArray data = d->buffer->buffer(); + data.detach(); + + delete d->buffer; + d->buffer = NULL; + + d->lastError = job->error(); + emit dataRetrieved(data, d->lastError == 0); +} + +void FileRetriever::slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &newUrl) +{ + emit permanentRedirection(newUrl); +} + +void FileRetriever::abort() +{ + if (d->job) + { + d->job->kill(true); + d->job = NULL; + } +} + +struct OutputRetriever::Private +{ + Private() : process(NULL), + buffer(NULL), + lastError(0) + { + } + + ~Private() + { + delete process; + delete buffer; + } + + KShellProcess *process; + QBuffer *buffer; + int lastError; +}; + +OutputRetriever::OutputRetriever() : + d(new Private) +{ +} + +OutputRetriever::~OutputRetriever() +{ + delete d; +} + +void OutputRetriever::retrieveData(const KURL &url) +{ + // Ignore subsequent calls if we didn't finish the previous job yet. + if (d->buffer || d->process) + return; + + d->buffer = new QBuffer; + d->buffer->open(IO_WriteOnly); + + d->process = new KShellProcess(); + connect(d->process, SIGNAL(processExited(KProcess *)), + SLOT(slotExited(KProcess *))); + connect(d->process, SIGNAL(receivedStdout(KProcess *, char *, int)), + SLOT(slotOutput(KProcess *, char *, int))); + *d->process << url.path(); + d->process->start(KProcess::NotifyOnExit, KProcess::Stdout); +} + +int OutputRetriever::errorCode() const +{ + return d->lastError; +} + +void OutputRetriever::slotOutput(KProcess *, char *data, int length) +{ + d->buffer->writeBlock(data, length); +} + +void OutputRetriever::slotExited(KProcess *p) +{ + if (!p->normalExit()) + d->lastError = p->exitStatus(); + + QByteArray data = d->buffer->buffer(); + data.detach(); + + delete d->buffer; + d->buffer = NULL; + + delete d->process; + d->process = NULL; + + emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0); +} + +struct Loader::Private +{ + Private() : retriever(NULL), + lastError(0) + { + } + + ~Private() + { + delete retriever; + } + + DataRetriever *retriever; + int lastError; + KURL discoveredFeedURL; + KURL url; +}; + +Loader *Loader::create() +{ + return new Loader; +} + +Loader *Loader::create(QObject *object, const char *slot) +{ + Loader *loader = create(); + connect(loader, SIGNAL(loadingComplete(Loader *, Document, Status)), + object, slot); + return loader; +} + +Loader::Loader() : d(new Private) +{ +} + +Loader::~Loader() +{ + delete d; +} + +void Loader::loadFrom(const KURL &url, DataRetriever *retriever) +{ + if (d->retriever != NULL) + return; + + d->url=url; + d->retriever = retriever; + + connect(d->retriever, SIGNAL(dataRetrieved(const QByteArray &, bool)), + this, SLOT(slotRetrieverDone(const QByteArray &, bool))); + + d->retriever->retrieveData(url); +} + +int Loader::errorCode() const +{ + return d->lastError; +} + +void Loader::abort() +{ + if (d && d->retriever) + { + d->retriever->abort(); + delete d->retriever; + d->retriever=NULL; + } + emit loadingComplete(this, QDomDocument(), Aborted); + delete this; +} + +const KURL &Loader::discoveredFeedURL() const +{ + return d->discoveredFeedURL; +} + +void Loader::slotRetrieverDone(const QByteArray &data, bool success) +{ + d->lastError = d->retriever->errorCode(); + + delete d->retriever; + d->retriever = NULL; + + Document rssDoc; + Status status = Success; + + if (success) { + QDomDocument doc; + + /* Some servers insert whitespace before the <?xml...?> declaration. + * QDom doesn't tolerate that (and it's right, that's invalid XML), + * so we strip that. + */ + + const char *charData = data.data(); + int len = data.count(); + + while (len && QChar(*charData).isSpace()) { + --len; + ++charData; + } + + if ( len > 3 && QChar(*charData) == QChar(0357) ) { // 0357 0273 0277 + len -= 3; + charData += 3; + } + QByteArray tmpData; + tmpData.setRawData(charData, len); + + if (doc.setContent(tmpData)) + { + rssDoc = Document(doc); + if (!rssDoc.isValid()) + { + discoverFeeds(tmpData); + status = ParseError; + } + } + else + { + discoverFeeds(tmpData); + status = ParseError; + } + + tmpData.resetRawData(charData, len); + } else + status = RetrieveError; + + emit loadingComplete(this, rssDoc, status); + + delete this; +} + +void Loader::discoverFeeds(const QByteArray &data) +{ + QString str = QString(data).simplifyWhiteSpace(); + + QStringList feeds; + + FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str); + + for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it) + { + feeds += (*it).url(); + } + + if (list.isEmpty()) + feeds = FeedDetector::extractBruteForce(str); + + QString feed = feeds.first(); + QString host = d->url.host(); + KURL testURL; + // loop through, prefer feeds on same host + QStringList::Iterator end( feeds.end() ); + for ( QStringList::Iterator it = feeds.begin(); it != end; ++it) + { + testURL=*it; + if (testURL.host() == host) + { + feed = *it; + break; + } + } + + d->discoveredFeedURL = feed.isNull() ? QString() : FeedDetector::fixRelativeURL(feed, d->url); +} + +#include "loader.moc" +// vim:noet:ts=4 diff --git a/akregator/src/librss/loader.h b/akregator/src/librss/loader.h new file mode 100644 index 000000000..d60bad744 --- /dev/null +++ b/akregator/src/librss/loader.h @@ -0,0 +1,341 @@ +/* + * loader.h + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_LOADER_H +#define LIBRSS_LOADER_H + +#include "global.h" + +class KURL; + +#include <qobject.h> + +namespace KIO +{ + class Job; +} +class KProcess; + +namespace RSS +{ + class Document; + + /** + * Abstract baseclass for all data retriever classes. Subclass this to add + * a new retrieval algorithm which can then be plugged into the RSS loader. + * @see Loader, FileRetriever, OutputRetriever + */ + class KDE_EXPORT DataRetriever : public QObject + { + Q_OBJECT + public: + /** + * Default constructor. + */ + DataRetriever(); + + /** + * Destructor. + */ + virtual ~DataRetriever(); + + /** + * Retrieve data from the given URL. This method is supposed to get + * reimplemented by subclasses. It will be called by the Loader + * class in case it needs to retrieve the data. + * @see Loader::loadFrom() + */ + virtual void retrieveData(const KURL &url) = 0; + + /** + * @return An error code which might give a more precise information + * about what went wrong in case the 'success' flag returned with + * the dataRetrieved() signal was 'false'. Note that the meaning of + * the returned integer depends on the actual data retriever. + */ + virtual int errorCode() const = 0; + + virtual void abort() = 0; + signals: + /** + * Emit this signal to tell the Loader class that the retrieval + * process was finished. + * @param data Should contain the retrieved data and will get + * parsed by the Loader class. + * @param success Indicates whether there were any problems during + * the retrieval process. Pass 'true' to indicate that everything + * went seamlessy, 'false' to tell the Loader that something went + * wrong and that the data parameter might contain no or invalid + * data. + */ + void dataRetrieved(const QByteArray &data, bool success); + + private: + DataRetriever(const DataRetriever &other); + DataRetriever &operator=(const DataRetriever &other); + }; + + /** + * Implements a file retriever, to be used with Loader::loadFrom(). + * @see DataRetriever, Loader::loadFrom() + */ + class KDE_EXPORT FileRetriever : public DataRetriever + { + Q_OBJECT + public: + /** + * Default constructor. + */ + FileRetriever(); + + /** + * Destructor. + */ + virtual ~FileRetriever(); + + /** + * Downloads the file referenced by the given URL and passes it's + * contents on to the Loader. + * @param url An URL referencing a file which is assumed to + * reference valid XML. + * @see Loader::loadFrom() + */ + virtual void retrieveData(const KURL &url); + + /** + * @return The error code for the last process of retrieving data. + * The returned numbers correspond directly to the error codes + * <a href="http://developer.kde.org/documentation/library/cvs-api/classref/kio/KIO.html#Error">as + * defined by KIO</a>. + */ + virtual int errorCode() const; + + virtual void abort(); + + static void setUseCache(bool enabled); + static void setUserAgent(const QString &ua); + static QString userAgent(); + + signals: + /** + * Signals a permanent redirection. The redirection itself is + * handled internally, so you don't need to call Loader::loadFrom() + * with the new URL. This signal is useful in case you want to + * notify the user, or adjust a database entry. + * @see Loader::loadFrom() + */ + void permanentRedirection(const KURL &url); + + protected slots: + void slotTimeout(); + + private slots: + void slotData(KIO::Job *job, const QByteArray &data); + void slotResult(KIO::Job *job); + void slotPermanentRedirection(KIO::Job *job, const KURL &fromUrl, + const KURL &toUrl); + + private: + static bool m_useCache; + + FileRetriever(const FileRetriever &other); + FileRetriever &operator=(const FileRetriever &other); + + struct Private; + Private *d; + }; + + /** + * Implements a data retriever which executes a program and stores returned + * by the program on stdout. To be used with Loader::loadFrom(). + * @see DataRetriever, Loader::loadFrom() + */ + class OutputRetriever : public DataRetriever + { + Q_OBJECT + public: + /** + * Default constructor. + */ + OutputRetriever(); + + /** + * Destructor. + */ + virtual ~OutputRetriever(); + + /** + * Executes the program referenced by the given URL and retrieves + * the data which the program prints to stdout. + * @param url An URL which is supposed to reference an executable + * file. + * @see Loader::loadFrom() + */ + virtual void retrieveData(const KURL &url); + + /** + * @return The error code for the last process of retrieving data. + * 0 is returned in case there was no error, otherwise an error + * code which depends on the particular program which was run is + * returned. + */ + virtual int errorCode() const; + + virtual void abort() {} + + private slots: + void slotOutput(KProcess *process, char *data, int length); + void slotExited(KProcess *process); + + private: + OutputRetriever(const OutputRetriever &other); + OutputRetriever &operator=(const OutputRetriever &other); + + struct Private; + Private *d; + }; + + /** + * This class is the preferred way of loading RSS files. Usage is very + * straightforward: + * + * \code + * Loader *loader = Loader::create(); + * connect(loader, SIGNAL(loadingComplete(Loader *, Document, Status)), + * this, SLOT(slotLoadingComplete(Loader *, Document, Status))); + * loader->loadFrom("http://www.blah.org/foobar.rdf", new FileRetriever); + * \endcode + * + * This creates a Loader object, connects it's loadingComplete() signal to + * your custom slot and then makes it load the file + * 'http://www.blah.org/foobar.rdf' using the FileRetriever. You could've + * done something like this as well: + * + * \code + * // create the Loader, connect it's signal... + * loader->loadFrom("/home/myself/some-script.py", new OutputRetriever); + * \endcode + * + * That'd make the Loader use another algorithm for retrieving the RSS data; + * 'OutputRetriever' will make it execute the script + * '/home/myself/some-script.py' and assume whatever that script prints to + * stdout is RSS markup. This is e.g. handy for conversion scripts, which + * download an HTML file and convert it's contents into RSS markup. + * + * No matter what kind of retrieval algorithm you employ, your + * 'slotLoadingComplete' method might look like this: + * + * \code + * void MyClass::slotLoadingComplete(Loader *loader, Document doc, Status status) + * { + * // Note that Loader::~Loader() is private, so you cannot delete Loader instances. + * // You don't need to do that anyway since Loader instances delete themselves. + * + * if (status != RSS::Success) + * return; + * + * QString title = doc.title(); + * // do whatever you want with the information. + * } + * \endcode + * + * \note You have to create a copy of the passed Document instance in + * case you want/need to use it after the slot attached to the + * loadingComplete signal goes out of scope. This is e.g. the case if you + * intend to call getPixmap() on Document::image()! + */ + class KDE_EXPORT Loader : public QObject + { + Q_OBJECT + friend class someClassWhichDoesNotExist; + public: + /** + * Constructs a Loader instance. This is pretty much what the + * default constructor would do, except that it ensures that all + * Loader instances have been allocated on the heap (this is + * required so that Loader's can delete themselves safely after they + * emitted the loadingComplete() signal.). + * @return A pointer to a new Loader instance. + */ + static Loader *create(); + + /** + * Convenience method. Does the same as the above method except that + * it also does the job of connecting the loadingComplete() signal + * to the given slot for you. + * @param object A QObject which features the specified slot + * @param slot Which slot to connect to. + */ + static Loader *create(QObject *object, const char *slot); + + /** + * Loads the RSS file referenced by the given URL using the + * specified retrieval algorithm. Make sure that you connected + * to the loadingComplete() signal before calling this method so + * that you're guaranteed to get notified when the loading finished. + * \note A Loader object cannot load from multiple URLs simultaneously; + * consequently, subsequent calls to loadFrom will be discarded + * silently, only the first loadFrom request will be executed. + * @param url An URL referencing the input file. + * @param retriever A subclass of DataRetriever which implements a + * specialized retrieval behaviour. Note that the ownership of the + * retriever is transferred to the Loader, i.e. the Loader will + * delete it when it doesn't need it anymore. + * @see DataRetriever, Loader::loadingComplete() + */ + void loadFrom(const KURL &url, DataRetriever *retriever); + + /** + * Retrieves the error code of the last loading process (if any), + * as reported by the employed data retrever. + */ + int errorCode() const; + + const KURL &discoveredFeedURL() const; + + void abort(); + + signals: + /** + * This signal gets emitted when the loading process triggered by + * calling loadFrom() finished. + * @param loader A pointer pointing to the loader object which + * emitted this signal; this is handy in case you connect multiple + * loaders to a single slot. + * @param doc In case status is Success, this parameter holds the + * parsed RSS file. In case it's RetrieveError, you should query + * loader->errorCode() for the actual error code. + * Note that you have to create a copy of the passed Document + * instance in case you want/need to use it after the slot attached + * to the loadingComplete signal goes out of scope. This is e.g. + * the case if you intend to call getPixmap() on Document::image()! + * @param status A status byte telling whether there were any problems + * while retrieving or parsing the data. + * @see Document, Status + */ + void loadingComplete(Loader *loader, Document doc, Status status); + + private slots: + void slotRetrieverDone(const QByteArray &data, bool success); + + private: + Loader(); + Loader(const Loader &other); + Loader &operator=(const Loader &other); + ~Loader(); + void discoverFeeds(const QByteArray &data); + + struct Private; + Private *d; + }; +} + +#endif // LIBRSS_LOADER_H +// vim: noet:ts=4 diff --git a/akregator/src/librss/rss-faq.html b/akregator/src/librss/rss-faq.html new file mode 100644 index 000000000..480b19f98 --- /dev/null +++ b/akregator/src/librss/rss-faq.html @@ -0,0 +1,396 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html> +<head> +<title>RSS Headline Syndication - Frequently Asked Questions for Content Providers</title> +<meta http-equiv="Content-Type" content="text/html; charset=windows-1252" /> +<meta name="description" content="RSS Headline Syndication - Frequently Asked Questions for Content Providers" /> +<meta name="keywords" content="rss faq, rss, faq, rich site summary, rdf site summary, really simple syndication, headline syndication, syndication" /> +<meta name="robots" content="index,follow" /> +<style type="text/css"> +<!-- +td.content +{ +font-family: Verdana, Arial, Helvetica, sans-serif; +color: #000000; +font-size: 10pt; +font-weight: normal; +} +td.contentbold +{ +font-family: Verdana, Arial, Helvetica, sans-serif; +color: #000000; +font-size: 10pt; +font-weight: bold; +} +td.contentsmall +{ +font-family: Verdana, Arial, Helvetica, sans-serif; +color: #000000; +font-size: 8pt; +font-weight: normal; +} +--> +</style> +</head> +<body bgcolor="#ffffff"> +<p align="right"> +<i> + This document was taken from <a href="http://www.purplepages.ie/rss/">http://www.purplepages.ie/rss/</a>.<br/> + <tt><a href="mailto:raabe@kde.org">Frerich Raabe</a></tt> +</i> +</p> +<table border="0" width="620" cellpadding="1" cellspacing="0"> + <tr> + <td width="620" class="contentbold" colspan="2">RSS Headline Syndication<br /><br /> + <a name="top">Frequently Asked Questions for Content Providers</a> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"> + <br /><a href="#whatishs">1. What is headline syndication?</a><br /> + <a href="#rss">2. What is RSS?</a><br /> + <a href="#whyrss">3. Why syndicate your headlines with RSS?</a><br /> + <a href="#howrss">4. How can I create an RSS file?</a><br /> + <a href="#promoterss">5. How can I promote my RSS file?</a><br /> + <a href="#morerss">6. Where can I find more information about RSS?</a><br /> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="whatishs">1. What is headline syndication?</a> + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + Websites that publish new content regularly usually provide a list of news headline style links to their latest content. In addition to displaying these headlines on their own websites, it is very common for publishers to make them available for syndication, so that other websites or applications can also include their headlines.<br /><br /> + Headline syndication does not deal with the full text of articles, it is simply about syndicating an automatically updating list of headlines, with each headline being a link to the item that it refers to on the publishers website. + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="rss">2. What is RSS?</a> + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <a href="#top">top</a> + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + RSS is the name given to a simple and well-established XML format used to syndicate headlines. Once a website creates an RSS file they have created a means to allow others to syndicate their headlines.<br /><br /> + The first version of RSS (RSS 0.9) was released by <a href="http://www.netscape.com/">Netscape</a> in March 1999 as a format for adding news channels to their <a href="http://my.netscape.com/">My.Netscape.Com</a> portal. Then in July 1999 Netscape released RSS 0.91, incorporating most of the features of a format called <scriptingNews>, which was created by <a href="http://www.userland.com/">UserLand</a>. Shortly thereafter Netscape discontinued developing the RSS format, however UserLand persisted and RSS continued to grow in strength. In December 2000, the separate RSS-DEV Working Group released RSS 1.0 and Userland announced RSS 0.92. As of April 2001, Userland is now planning RSS 0.93. Although RSS is not clearly an acronym of anything, different people have called it Rich Site Summary, RDF Site Summary and Really Simple Syndication at different times.<br /><br /> + The lack of clarity in what RSS stands for or which version is the correct one to use can seem confusing to beginners. However these issues don't need to addressed by a website wanting to create an RSS file. RSS is a very well recognised format, in fact it is often referred to as the most successful XML format to date. Some websites have a preference for one version, others create more than one RSS file and support multiple versions and a recent survey suggests that the first two versions of RSS (0.9 and 0.91) are still by far the most popular. + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <a href="#top">top</a> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="whyrss">3. Why syndicate your headlines with RSS?</a> + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + Syndicating headlines is an excellent and cost-effective way of driving traffic to, and increasing brand awareness of, any website that publishes new content regularly.<br /><br /> + Once a website produces an RSS file they are enabling others to syndicate their headlines, without any further work on their part.<br /><br /> + The main benefits of creating an RSS file:<br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content">RSS content can be included in customisable online news portals that aggregate RSS headlines like <a href="http://my.userland.com/">My.Userland.Com</a>. + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content">Websites that display news headlines can use an RSS file to incorporate another websites headlines into their own. + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content">RSS content can be added to personal desktop news reading applications like <a href="http://www.headlineviewer.com/">Headline Viewer</a> or <a href="http://radio.userland.com/">Radio Userland</a>. + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content">Email newsletter providers could allow users to subscribe to RSS channels. <a href="http://www.xml.com/">XML.com</a> and <a href="http://www.xmltree.com/">XMLTree.com</a> previously offered such a service called Newsboy. + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + One positive side effect of producing an RSS file is that it can also be used by headline aggregation services like <a href="http://www.moreover.com/">Moreover.com</a>, who power news portals, specialist news search engines, business intelligence services or provide newsfeeds to websites. Most such companies use crawler-based technologies to aggregate and do not insist upon content being available in RSS, however they do have some requirements which having an RSS file addresses, sparing the need for any work on the part of a website that already publishes its headlines in RSS. + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <a href="#top">top</a> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="howrss">4. How can I create an RSS file?</a> + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + RSS is a simple XML format and anyone who has experience in a mark-up language like HTML or XML should find it very easy to create and maintain an RSS file by hand. + <br /><br />Many websites prefer to generate their RSS file using a programming language, which involves a little more work to begin with but means that maintenance is no longer an issue. + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + In this section: + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"> + <a href="#specifications">RSS Specifications</a><br /> + <a href="#validators">RSS Validators</a><br /> + <a href="#tutorials">RSS Tutorials - The Basics</a><br /> + <a href="#tutorialsgen">RSS Tutorials - Generating RSS</a><br /> + <a href="#examples">RSS Examples</a><br /> + <a href="#tools">RSS Tools & Utilities</a><br /> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="specifications">RSS Specifications:</a><br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" valign="top"><strong>RSS 0.93</strong> (Planning stage, April 2001)<br /><a href="http://backend.userland.com/rss093">http://backend.userland.com/rss093</a> (Userland) + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" valign="top"><strong>RSS 0.92</strong> (December 2000)<br /><a href="http://backend.userland.com/rss092">http://backend.userland.com/rss092</a> (Userland)<br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" valign="top"><strong>RSS 1.0</strong> (December 2000)<br /><a href="http://groups.yahoo.com/group/rss-dev/files/specification.html">http://groups.yahoo.com/group/rss-dev/files/specification.html</a> (RSS-DEV Working Group)<br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" valign="top"><strong>RSS 0.91</strong> (July 1999)<br /><a href="http://backend.userland.com/rss091">http://backend.userland.com/rss091</a> (Userland)<br /> + <a href="http://www.purplepages.ie/RSS/netscape/rss0.91.html">http://www.purplepages.ie/RSS/netscape/rss0.91.html</a> (Netscape)<br /> + <a href="http://my.netscape.com/publish/formats/rss-spec-0.91.html">http://my.netscape.com/publish/formats/rss-spec-0.91.html</a> (Netscape, Revision 3)<br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" valign="top"><strong>RSS 0.90</strong> (March 1999)<br /><a href="http://www.purplepages.ie/RSS/netscape/rss0.90.html">http://www.purplepages.ie/RSS/netscape/rss0.90.html</a> (Netscape)<br /> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="validators">RSS Validators</a>:<br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content"><a href="http://aggregator.userland.com/validator">http://aggregator.userland.com/validator</a> (RSS 0.91, RSS 0.92)</td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content"><a href="http://www.bath.ac.uk/~ccslrd/rss_validator/1.0/">http://www.bath.ac.uk/~ccslrd/rss_validator/1.0/</a> (RSS 1.0)</td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content"><a href="http://www.bath.ac.uk/~ccslrd/rss_validator/">http://www.bath.ac.uk/~ccslrd/rss_validator/</a> (RSS 0.9)</td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <strong><a name="tutorials">RSS Tutorials - The Basics:</a></strong> (See also <a href="#specifications">RSS Specifications</a>, <a href="#websites">Websites</a>)<br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"> - <a href="http://www.oreillynet.com/pub/a/network/4000/08/25/magazine/rss_tut.html">A step-by-step guide to building an RSS 1.0 document from the O'Reilly Network.</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"> - <a href="http://publishing.about.com/arts/publishing/library/blrss.htm">An easy to understand introduction to RSS 0.91 from About.com.</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"> - <a href="http://webreference.com/xml/column13/index.html">A comprehensive guide to creating RSS 0.91 files from Webreference.</a></td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="tutorialsgen">RSS Tutorials - Generating RSS</a>:<br /> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content">Active Server Pages (ASP)<br /> + <a href="http://www.purplepages.ie/site/articles/article.asp?faq=6&fldAuto=76">An article explaining how RSS files can be generated using ASP.</a> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content">Perl<br /> + <a href="http://www.webtechniques.com/archives/2000/02/eisenzopf/">Jonathan Eisenzopf explains how his XML::RSS module can be used to create an RSS file.</a> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content">PHP<br /> + <a href="http://linux.gelrevision.nl/php/">phpChannel, a set of two PHP class files to write rss files.</a> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="tools">RSS Tools & Utilities</a>: + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + Aaron Swartz provides a useful online utility called <a href="http://logicerror.com/blogifyYourPage">BlogifyYourPage</a>, that makes it easy to produce an RSS 1.0 file for any page.<br /><br /> + The <a href="http://www.webreference.com/perl/tools/">RSS Channel Editor</a> is a simple Perl CGI script that makes it easy to maintain an RSS channel. It can be used online at Webreference and you can also download the source. + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <a href="#top">top</a> + </td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="examples">RSS Examples</a>: + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <a href="http://newsfeeds.manilasites.com/">Newsfeeds</a> reviews sources of RSS files, good examples and ideas you can use in putting together your own feed.<br /><br /> + <a href="http://www.ourfavoritesongs.com/">OurFavoriteSongs.Com</a> is a source of popular syndicated files, the top picks of <a href="http://radio.userland.com/">Radio Userland</a> users. + </td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <a name="promoterss"><strong>5. How can I promote my RSS file?</strong></a> + <br /><br />There are a couple of important places to register RSS files, firstly <a href="http://www.xmltree.com/">XMLTree.com</a>, a specialist directory of XML content, and secondly <a href="http://my.userland.com/">My.Userland.Com</a>. Once an RSS file has been included in these sources it is likely to be found by websites, online news portals or news reading applications seeking RSS content.<br /><br /> + Websites should also create an information page, about syndicating their headlines. This will make existing users aware that the website has an RSS file so they can add it to their news reading applications or even include it on their own websites.<br /><br /> + This information page will be indexed by regular search engines and can also be submitted to various niche directories: + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.4freecontent.com/">4FreeContent</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.findsticky.com/">FindSticky</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.freesticky.com/">FreeSticky</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://newsfeeds.manilasites.com/">Newsfeeds</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.purplepages.ie/site/content/">Purple Pages</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.woodoggy.com/">WooDoggy</a></td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + Websites that are interested in having their headlines picked up by organisations that aggregate headline content may also wish to visit: + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.linkyournews.com/">LinkYourNews.com</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.magportal.com/">MagPortal.com</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.moreover.com/">Moreover.com</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.newsnow.co.uk/">NewsNow.co.uk</a></td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.newsisfree.com/">NewsIsFree.com</a></td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + <a name="morerss">6. Where can I find more information about RSS?</a><br /><br /> + <a name="websites">Websites</a> + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.oreillynet.com/rss/">O'Reilly DevCenter RSS</a> - Articles about RSS from the O'Reilly Network.</td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://blogspace.com/rss/">RSS Info</a> - News and information on the RSS format</td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://rsswhys.weblogger.com/">RSS Why?s</a> - A site that aims to objectively and concisely explore all the points surrounding the creation, maintenance, and history of RSS.</td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.webreference.com/authoring/languages/xml/rss/">WebReference RSS Articles</a> - A collection of RSS articles and resources from Webreference.</td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + Discussion Lists + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://groups.yahoo.com/group/reallySimpleSyndication">ReallySimpleSyndication</a> - RSS 0.93.</td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://groups.yahoo.com/group/rss-dev">RSS-DEV</a> - RSS 1.0.</td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://groups.yahoo.com/group/syndication">Syndication</a> - XML syndication, mainly RSS 0.91.</td> + </tr> + <tr> + <td width="620" class="contentbold" colspan="2"><br /> + More RSS FAQs + </td> + </tr> + <tr> + <td width="15" class="content" align="center" valign="top">•</td> + <td width="605" class="content" colspan="2"><a href="http://www.voidstar.com/rssfaq">RSS FAQ</a> - A detailed RSS FAQ from Julian Bond, readers can also contribute.</td> + </tr> + <tr> + <td width="620" class="content" colspan="2"><br /> + <a href="#top">top</a> + </td> + </tr> + <tr> + <td width="620" class="contentsmall" colspan="2"><br /><br /><a href="http://www.rssfaq.com/">RSSFAQ</a> Copyright © 2001 Members of the Syndication, RSS-DEV and ReallySimpleSyndication Groups.</td> + </tr> + <tr> + <td width="620" class="contentsmall" colspan="2">You may freely copy and distribute this document. Please give acknowledgements if you do.</td> + </tr> + <tr> + <td width="620" class="contentsmall" colspan="2">Last Updated: 24-August-2001 <a href="mailto:alis@purplepages.ie">Alis Marsden</a>.</td> + </tr> +</table> +</body> +</html> diff --git a/akregator/src/librss/testlibrss.cpp b/akregator/src/librss/testlibrss.cpp new file mode 100644 index 000000000..5d98bba46 --- /dev/null +++ b/akregator/src/librss/testlibrss.cpp @@ -0,0 +1,75 @@ +#include "testlibrss.h" + +#include "image.h" + +#include <kaboutdata.h> +#include <kcmdlineargs.h> +#include <kapplication.h> +#include <kdebug.h> + +using namespace RSS; + +static const KCmdLineOptions options[] = +{ + { "+url", I18N_NOOP("URL of feed"), 0 }, + KCmdLineLastOption +}; + + +void Tester::test( const QString &url ) +{ + Loader *loader = Loader::create(); + connect( loader, SIGNAL( loadingComplete( Loader *, Document, Status ) ), + this, SLOT( slotLoadingComplete( Loader *, Document, Status ) ) ); + loader->loadFrom( url, new FileRetriever ); +} + +void Tester::slotLoadingComplete( Loader *loader, Document doc, Status status ) +{ + if ( status == Success ) + { + kdDebug() << "Successfully retrieved '" << doc.title() << "'" << endl; + kdDebug() << doc.description() << endl; + + if ( doc.image() ) { + kdDebug() << "Image: "; + kdDebug() << " Title: " << doc.image()->title() << endl; + kdDebug() << " URL: " << doc.image()->url() << endl; + kdDebug() << " Link: " << doc.image()->link() << endl; + } + + kdDebug() << "Articles:" << endl; + + Article::List list = doc.articles(); + Article::List::ConstIterator it; + Article::List::ConstIterator en=list.end(); + for (it = list.begin(); it != en; ++it) + { + kdDebug() << "\tTitle: " << (*it).title() << endl; + kdDebug() << "\tText: " << (*it).description() << endl; + } + } + + if ( status != Success ) + kdDebug() << "ERROR " << loader->errorCode() << endl; + + kapp->quit(); +} + +int main( int argc, char **argv ) +{ + KAboutData aboutData( "testlibrss", "testlibrss", "0.1" ); + KCmdLineArgs::init( argc, argv, &aboutData ); + KCmdLineArgs::addCmdLineOptions( options ); + KApplication app; + + KCmdLineArgs *args = KCmdLineArgs::parsedArgs(); + if ( args->count() != 1 ) args->usage(); + + Tester tester; + tester.test( args->arg( 0 ) ); + + return app.exec(); +} + +#include "testlibrss.moc" diff --git a/akregator/src/librss/testlibrss.h b/akregator/src/librss/testlibrss.h new file mode 100644 index 000000000..c65fa3bd2 --- /dev/null +++ b/akregator/src/librss/testlibrss.h @@ -0,0 +1,25 @@ +#ifndef TESTLIBRSS_H +#define TESTLIBRSS_H + +#include <qobject.h> + +#include "loader.h" +#include "document.h" +#include "article.h" +#include "global.h" + +using RSS::Loader; +using RSS::Document; +using RSS::Status; + +class Tester : public QObject +{ + Q_OBJECT + public: + void test( const QString &url ); + + private slots: + void slotLoadingComplete( Loader *loader, Document doc, Status status ); +}; + +#endif diff --git a/akregator/src/librss/textinput.cpp b/akregator/src/librss/textinput.cpp new file mode 100644 index 000000000..432b773aa --- /dev/null +++ b/akregator/src/librss/textinput.cpp @@ -0,0 +1,96 @@ +/* + * textinput.cpp + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#include "textinput.h" +#include "tools_p.h" + +#include <kurl.h> + +#include <qdom.h> + +using namespace RSS; + +struct TextInput::Private : public Shared +{ + QString title; + QString description; + QString name; + KURL link; +}; + +TextInput::TextInput() : d(new Private) +{ +} + +TextInput::TextInput(const TextInput &other) : d(0) +{ + *this = other; +} + +TextInput::TextInput(const QDomNode &node) : d(new Private) +{ + QString elemText; + + if (!(elemText = extractNode(node, QString::fromLatin1("title"))).isNull()) + d->title = elemText; + if (!(elemText = extractNode(node, QString::fromLatin1("description"))).isNull()) + d->description = elemText; + if (!(elemText = extractNode(node, QString::fromLatin1("name")))) + d->name = elemText; + if (!(elemText = extractNode(node, QString::fromLatin1("link"))).isNull()) + d->link = elemText; +} + +TextInput::~TextInput() +{ + if (d->deref()) + delete d; +} + +QString TextInput::title() const +{ + return d->title; +} + +QString TextInput::description() const +{ + return d->description; +} + +QString TextInput::name() const +{ + return d->name; +} + +const KURL &TextInput::link() const +{ + return d->link; +} + +TextInput &TextInput::operator=(const TextInput &other) +{ + if (this != &other) { + other.d->ref(); + if (d && d->deref()) + delete d; + d = other.d; + } + return *this; +} + +bool TextInput::operator==(const TextInput &other) const +{ + return d->title == other.title() && + d->description == other.description() && + d->name == other.name() && + d->link == other.link(); +} + +// vim:noet:ts=4 diff --git a/akregator/src/librss/textinput.h b/akregator/src/librss/textinput.h new file mode 100644 index 000000000..dd13c424b --- /dev/null +++ b/akregator/src/librss/textinput.h @@ -0,0 +1,121 @@ +/* + * textinput.h + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_TEXTINPUT_H +#define LIBRSS_TEXTINPUT_H + +#include "global.h" + +class KURL; + +class QDomNode; +class QString; + +namespace RSS +{ + /** + * Represents a text input facility as stored in a RSS file for the purpose + * of allowing users to submit queries back to the publisher's site. You + * don't have to instantiate one of these yourself, the common way to access + * instances is via Document::textInput(). + * @see Document::textInput() + */ + class TextInput + { + public: + /** + * Default constructor. + */ + TextInput(); + + /** + * Copy constructor. + * @param other The TextInput object to copy. + */ + TextInput(const TextInput &other); + + /** + * Constructs a TextInput from a piece of RSS markup. + * @param node A QDomNode which references the DOM leaf to be used + * for constructing the TextInput. + */ + TextInput(const QDomNode &node); + + /** + * Assignment operator. + * @param other The TextInput object to clone. + * @return A reference to the cloned TextInput object. + */ + TextInput &operator=(const TextInput &other); + + /** + * Compares two text inputs. Two text inputs are considered + * identical if their properties (title, description, link etc.) + * are identical. + * @param other The text input to compare with. + * @return Whether the two text inputs are equal. + */ + bool operator==(const TextInput &other) const; + + /** + * Convenience method. Simply calls !operator==(). + * @param other The text input to compared with. + * @return Whether the two text inputs are unequal. + */ + bool operator!=(const TextInput &other) const { return !operator==(other); } + + /** + * Destructor. + */ + virtual ~TextInput(); + + /** + * RSS 0.90 and upwards + * @return The title (often a label to be used for the input field) + * of the text input, or QString::null if no title is available. + */ + QString title() const; + + /** + * RSS 0.90 and upwards + * @return The description (usually used as a tooltip which appears + * if the mouse hovers above the input field for a short time) of + * the text input, or QString::null if no description is + * available. + */ + QString description() const; + + /** + * RSS 0.90 and upwards + * @return The name of the text input (what's this for?) of the + * text input, or QString::null, if no name is available. + */ + QString name() const; + + /** + * RSS 0.90 and upwards + * @return A link to which the contents of the input field should + * be sent after the user specified them. This is often a CGI + * program on a remote server which evaluates the entered + * information. An empty KURL is returned in case no link is + * available. + * Note that the RSS 0.91 Specification dictates that URLs not + * starting with "http://" or "ftp://" are considered invalid. + */ + const KURL &link() const; + + private: + struct Private; + Private *d; + }; +} + +#endif // LIBRSS_TEXTINPUT_H +// vim: noet:ts=4 diff --git a/akregator/src/librss/tools_p.cpp b/akregator/src/librss/tools_p.cpp new file mode 100644 index 000000000..dec831814 --- /dev/null +++ b/akregator/src/librss/tools_p.cpp @@ -0,0 +1,249 @@ +/* + * tools_p.cpp + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#include "tools_p.h" + +#include <krfcdate.h> +#include <qdom.h> +#include <kcharsets.h> +#include <qregexp.h> + +namespace RSS { + +time_t parseISO8601Date(const QString &s) +{ + // do some sanity check: 26-12-2004T00:00+00:00 is parsed to epoch+1 in the KRFCDate, which is wrong. So let's check if the date begins with YYYY -fo + if (s.stripWhiteSpace().left(4).toInt() < 1000) + return 0; // error + + // FIXME: imho this is done in KRFCDate::parseDateISO8601() automatically, so we could omit it? -fo + if (s.find('T') != -1) + return KRFCDate::parseDateISO8601(s); + else + return KRFCDate::parseDateISO8601(s + "T12:00:00"); +} + +QString childNodesAsXML(const QDomNode& parent) +{ + QDomNodeList list = parent.childNodes(); + QString str; + QTextStream ts( &str, IO_WriteOnly ); + for (uint i = 0; i < list.count(); ++i) + ts << list.item(i); + return str.stripWhiteSpace(); +} + +static QString plainTextToHtml(const QString& plainText) +{ + QString str(plainText); + str.replace("&", "&"); + str.replace("\"", """); + str.replace("<", "<"); + //str.replace(">", ">"); + str.replace("\n", "<br/>"); + return str; +} + +enum ContentFormat { Text, HTML, XML, Binary }; + +static ContentFormat mapTypeToFormat(const QString& modep, const QString& typep, const QString& src) +{ + QString mode = modep.isNull() ? "escaped" : modep; + QString type = typep; + + //"If neither the type attribute nor the src attribute is provided, + //Atom Processors MUST behave as though the type attribute were + //present with a value of "text"" + if (type.isNull() && src.isEmpty()) + type = QString::fromUtf8("text"); + + if (type == QString::fromUtf8("html") + || type == QString::fromUtf8("text/html")) + return HTML; + + if (type == QString::fromUtf8("text") + || (type.startsWith(QString::fromUtf8("text/"), false) + && !type.startsWith(QString::fromUtf8("text/xml"), false)) + ) + return Text; + + QStringList xmltypes; + xmltypes.append(QString::fromUtf8("xhtml")); + // XML media types as defined in RFC3023: + xmltypes.append(QString::fromUtf8("text/xml")); + xmltypes.append(QString::fromUtf8("application/xml")); + xmltypes.append(QString::fromUtf8("text/xml-external-parsed-entity")); + xmltypes.append(QString::fromUtf8("application/xml-external-parsed-entity")); + xmltypes.append(QString::fromUtf8("application/xml-dtd")); + + + if (xmltypes.contains(type) + || type.endsWith(QString::fromUtf8("+xml"), false) + || type.endsWith(QString::fromUtf8("/xml"), false)) + return XML; + + return Binary; +} + +static QString extractAtomContent(const QDomElement& e) +{ + ContentFormat format = mapTypeToFormat(e.attribute("mode"), + e.attribute("type"), + e.attribute("src")); + + switch (format) + { + case HTML: + { + const bool hasPre = e.text().contains( "<pre>", false ) || e.text().contains( "<pre ", false ); + return KCharsets::resolveEntities( hasPre ? e.text() : e.text().simplifyWhiteSpace() ); + } + case Text: + return plainTextToHtml(e.text().stripWhiteSpace()); + case XML: + return childNodesAsXML(e).simplifyWhiteSpace(); + case Binary: + default: + return QString(); + } + + return QString(); +} + +QString extractNode(const QDomNode &parent, const QString &elemName, bool isInlined) +{ + QDomNode node = parent.namedItem(elemName); + if (node.isNull()) + return QString::null; + + QDomElement e = node.toElement(); + QString result = e.text().stripWhiteSpace(); // let's assume plain text + + if (elemName == "content") // we have Atom here + { + result = extractAtomContent(e); + } + else // check for HTML; not necessary for Atom:content + { + bool hasPre = result.contains("<pre>", false) || result.contains("<pre ", false); + bool hasHtml = hasPre || result.contains("<"); // FIXME: test if we have html, should be more clever -> regexp + if(!isInlined && !hasHtml) // perform nl2br if not a inline elt and it has no html elts + result = result = result.replace(QChar('\n'), "<br />"); + if(!hasPre) // strip white spaces if no <pre> + result = result.simplifyWhiteSpace(); + } + + return result.isEmpty() ? QString::null : result; +} + +QString extractTitle(const QDomNode & parent) +{ + QDomNode node = parent.namedItem(QString::fromLatin1("title")); + if (node.isNull()) + return QString::null; + + QString result = node.toElement().text(); + + result = KCharsets::resolveEntities(KCharsets::resolveEntities(result).replace(QRegExp("<[^>]*>"), "").remove("\\")); + result = result.simplifyWhiteSpace(); + + if (result.isEmpty()) + return QString::null; + + return result; +} + +static void authorFromString(const QString& strp, QString& name, QString& email) +{ + QString str = strp.stripWhiteSpace(); + if (str.isEmpty()) + return; + + // look for something looking like a mail address ( "foo@bar.com", + // "<foo@bar.com>") and extract it + + QRegExp remail("<?([^@\\s<]+@[^>\\s]+)>?"); // FIXME: user "proper" regexp, + // search kmail source for it + + int pos = remail.search(str); + if (pos != -1) + { + QString all = remail.cap(0); + email = remail.cap(1); + str.replace(all, ""); // remove mail address + } + + // simplify the rest and use it as name + + name = str.simplifyWhiteSpace(); + + // after removing the email, str might have + // the format "(Foo M. Bar)". We cut off + // parentheses if there are any. However, if + // str is of the format "Foo M. Bar (President)", + // we should not cut anything. + + QRegExp rename("^\\(([^\\)]*)\\)"); + + pos = rename.search(name); + + if (pos != -1) + { + name = rename.cap(1); + } + + name = name.isEmpty() ? QString() : name; + email = email.isEmpty() ? QString() : email; +} + +QString parseItemAuthor(const QDomElement& element, Format format, Version version) +{ + QString name; + QString email; + + QDomElement dcCreator = element.namedItem("dc:creator").toElement(); + + if (!dcCreator.isNull()) + authorFromString(dcCreator.text(), name, email); + else if (format == AtomFeed) + { + QDomElement atomAuthor = element.namedItem("author").toElement(); + if (atomAuthor.isNull()) + atomAuthor = element.namedItem("atom:author").toElement(); + if (!atomAuthor.isNull()) + { + QDomElement atomName = atomAuthor.namedItem("name").toElement(); + if (atomName.isNull()) + atomName = atomAuthor.namedItem("atom:name").toElement(); + name = atomName.text().stripWhiteSpace(); + + QDomElement atomEmail = atomAuthor.namedItem("email").toElement(); + if (atomEmail.isNull()) + atomEmail = atomAuthor.namedItem("atom:email").toElement(); + email = atomEmail.text().stripWhiteSpace(); + } + } + else if (format == RSSFeed) + { + authorFromString(element.namedItem("author").toElement().text(), name, email); + } + + if (name.isNull()) + name = email; + + if (!email.isNull()) + return QString("<a href=\"mailto:%1\">%2</a>").arg(email).arg(name); + else + return name; +} + +} // namespace RSS + +// vim:noet:ts=4 diff --git a/akregator/src/librss/tools_p.h b/akregator/src/librss/tools_p.h new file mode 100644 index 000000000..a257da8a6 --- /dev/null +++ b/akregator/src/librss/tools_p.h @@ -0,0 +1,40 @@ +/* + * tools_p.h + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#ifndef LIBRSS_TOOLS_P_H +#define LIBRSS_TOOLS_P_H + +#include "global.h" +#include <time.h> + + +class QDomNode; +class QDomElement; +class QString; + +namespace RSS +{ + struct Shared + { + Shared() : count(1) { } + void ref() { count++; } + bool deref() { return !--count; } + unsigned int count; + }; + + QString extractNode(const QDomNode &parent, const QString &elemName, bool isInlined=true); + QString extractTitle(const QDomNode &parent); + QString childNodesAsXML(const QDomNode& parent); + time_t parseISO8601Date(const QString &s); + QString parseItemAuthor(const QDomElement& element, Format format, Version version); +} + +#endif // LIBRSS_TOOLS_P_H +// vim:noet:ts=4 |