summaryrefslogtreecommitdiffstats
path: root/plugins/rssfeed/rss/loader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/rssfeed/rss/loader.cpp')
-rw-r--r--plugins/rssfeed/rss/loader.cpp425
1 files changed, 425 insertions, 0 deletions
diff --git a/plugins/rssfeed/rss/loader.cpp b/plugins/rssfeed/rss/loader.cpp
new file mode 100644
index 0000000..9dfb50a
--- /dev/null
+++ b/plugins/rssfeed/rss/loader.cpp
@@ -0,0 +1,425 @@
+/*
+ * loader.cpp
+ *
+ * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org>
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the
+ * accompanying file 'COPYING'.
+ */
+#include "loader.h"
+#include "document.h"
+
+#include <kio/job.h>
+#include <kprocess.h>
+#include <kurl.h>
+#include <kdebug.h>
+
+#include <qdom.h>
+#include <qbuffer.h>
+#include <qregexp.h>
+#include <qstringlist.h>
+#include <qtimer.h>
+
+using namespace RSS;
+
+DataRetriever::DataRetriever()
+{
+}
+
+DataRetriever::~DataRetriever()
+{
+}
+
+struct FileRetriever::Private
+{
+ Private()
+ : buffer(NULL),
+ lastError(0), job(NULL)
+ {
+ }
+
+ ~Private()
+ {
+ delete buffer;
+ }
+
+ QBuffer *buffer;
+ int lastError;
+ KIO::Job *job;
+};
+
+FileRetriever::FileRetriever()
+ : d(new Private)
+{
+}
+
+FileRetriever::~FileRetriever()
+{
+ delete d;
+}
+
+bool FileRetriever::m_useCache = true;
+
+void FileRetriever::setUseCache(bool enabled)
+{
+ m_useCache = enabled;
+}
+
+void FileRetriever::retrieveData(const KURL &url)
+{
+ if (d->buffer)
+ return;
+
+ d->buffer = new QBuffer;
+ d->buffer->open(IO_WriteOnly);
+
+ KURL u=url;
+
+ if (u.protocol()=="feed")
+ u.setProtocol("http");
+
+ d->job = KIO::get(u, !m_useCache, false);
+
+
+ QTimer::singleShot(1000*90, this, SLOT(slotTimeout()));
+
+ connect(d->job, SIGNAL(data(KIO::Job *, const QByteArray &)),
+ SLOT(slotData(KIO::Job *, const QByteArray &)));
+ connect(d->job, SIGNAL(result(KIO::Job *)), SLOT(slotResult(KIO::Job *)));
+ connect(d->job, SIGNAL(permanentRedirection(KIO::Job *, const KURL &, const KURL &)),
+ SLOT(slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &)));
+}
+
+void FileRetriever::slotTimeout()
+{
+ abort();
+
+ delete d->buffer;
+ d->buffer = NULL;
+
+ d->lastError = KIO::ERR_SERVER_TIMEOUT;
+
+ emit dataRetrieved(QByteArray(), false);
+}
+
+int FileRetriever::errorCode() const
+{
+ return d->lastError;
+}
+
+void FileRetriever::slotData(KIO::Job *, const QByteArray &data)
+{
+ d->buffer->writeBlock(data.data(), data.size());
+}
+
+void FileRetriever::slotResult(KIO::Job *job)
+{
+ QByteArray data = d->buffer->buffer();
+ data.detach();
+
+ delete d->buffer;
+ d->buffer = NULL;
+
+ d->lastError = job->error();
+ emit dataRetrieved(data, d->lastError == 0);
+}
+
+void FileRetriever::slotPermanentRedirection(KIO::Job *, const KURL &, const KURL &newUrl)
+{
+ emit permanentRedirection(newUrl);
+}
+
+void FileRetriever::abort()
+{
+ if (d->job)
+ {
+ d->job->kill(true);
+ d->job = NULL;
+ }
+}
+
+struct OutputRetriever::Private
+{
+ Private() : process(NULL),
+ buffer(NULL),
+ lastError(0)
+ {
+ }
+
+ ~Private()
+ {
+ delete process;
+ delete buffer;
+ }
+
+ KShellProcess *process;
+ QBuffer *buffer;
+ int lastError;
+};
+
+OutputRetriever::OutputRetriever() :
+ d(new Private)
+{
+}
+
+OutputRetriever::~OutputRetriever()
+{
+ delete d;
+}
+
+void OutputRetriever::retrieveData(const KURL &url)
+{
+ // Ignore subsequent calls if we didn't finish the previous job yet.
+ if (d->buffer || d->process)
+ return;
+
+ d->buffer = new QBuffer;
+ d->buffer->open(IO_WriteOnly);
+
+ d->process = new KShellProcess();
+ connect(d->process, SIGNAL(processExited(KProcess *)),
+ SLOT(slotExited(KProcess *)));
+ connect(d->process, SIGNAL(receivedStdout(KProcess *, char *, int)),
+ SLOT(slotOutput(KProcess *, char *, int)));
+ *d->process << url.path();
+ d->process->start(KProcess::NotifyOnExit, KProcess::Stdout);
+}
+
+int OutputRetriever::errorCode() const
+{
+ return d->lastError;
+}
+
+void OutputRetriever::slotOutput(KProcess *, char *data, int length)
+{
+ d->buffer->writeBlock(data, length);
+}
+
+void OutputRetriever::slotExited(KProcess *p)
+{
+ if (!p->normalExit())
+ d->lastError = p->exitStatus();
+
+ QByteArray data = d->buffer->buffer();
+ data.detach();
+
+ delete d->buffer;
+ d->buffer = NULL;
+
+ delete d->process;
+ d->process = NULL;
+
+ emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0);
+}
+
+struct Loader::Private
+{
+ Private() : retriever(NULL),
+ lastError(0)
+ {
+ }
+
+ ~Private()
+ {
+ delete retriever;
+ }
+
+ DataRetriever *retriever;
+ int lastError;
+ KURL discoveredFeedURL;
+ KURL url;
+};
+
+Loader *Loader::create()
+{
+ return new Loader;
+}
+
+Loader *Loader::create(QObject *object, const char *slot)
+{
+ Loader *loader = create();
+ connect(loader, SIGNAL(loadingComplete(Loader *, Document, Status)),
+ object, slot);
+ return loader;
+}
+
+Loader::Loader() : d(new Private)
+{
+}
+
+Loader::~Loader()
+{
+ delete d;
+}
+
+void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
+{
+ if (d->retriever != NULL)
+ return;
+
+ d->url=url;
+ d->retriever = retriever;
+
+ connect(d->retriever, SIGNAL(dataRetrieved(const QByteArray &, bool)),
+ this, SLOT(slotRetrieverDone(const QByteArray &, bool)));
+
+ d->retriever->retrieveData(url);
+}
+
+int Loader::errorCode() const
+{
+ return d->lastError;
+}
+
+void Loader::abort()
+{
+ if (d && d->retriever)
+ {
+ d->retriever->abort();
+ delete d->retriever;
+ d->retriever=NULL;
+ }
+ emit loadingComplete(this, QDomDocument(), Aborted);
+ delete this;
+}
+
+const KURL &Loader::discoveredFeedURL() const
+{
+ return d->discoveredFeedURL;
+}
+
+#include <kdebug.h>
+
+void Loader::slotRetrieverDone(const QByteArray &data, bool success)
+{
+ d->lastError = d->retriever->errorCode();
+
+ delete d->retriever;
+ d->retriever = NULL;
+
+ Document rssDoc;
+ Status status = Success;
+
+ if (success) {
+ QDomDocument doc;
+
+ /* Some servers insert whitespace before the <?xml...?> declaration.
+ * QDom doesn't tolerate that (and it's right, that's invalid XML),
+ * so we strip that.
+ */
+
+ const char *charData = data.data();
+ int len = data.count();
+
+ while (len && QChar(*charData).isSpace()) {
+ --len;
+ ++charData;
+ }
+
+ if ( len > 3 && QChar(*charData) == QChar(0357) ) { // 0357 0273 0277
+ len -= 3;
+ charData += 3;
+ }
+ QByteArray tmpData;
+ tmpData.setRawData(charData, len);
+
+ if (doc.setContent(tmpData))
+ {
+ rssDoc = Document(doc);
+ if (!rssDoc.isValid())
+ {
+ discoverFeeds(tmpData);
+ status = ParseError;
+ }
+ }
+ else
+ {
+ discoverFeeds(tmpData);
+ status = ParseError;
+ }
+
+ tmpData.resetRawData(charData, len);
+ } else
+ status = RetrieveError;
+
+ emit loadingComplete(this, rssDoc, status);
+
+ delete this;
+}
+
+void Loader::discoverFeeds(const QByteArray &data)
+{
+ QString str = QString(data).simplifyWhiteSpace();
+ QString s2;
+ //QTextStream ts( &str, IO_WriteOnly );
+ //ts << data.data();
+
+ // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
+ // "type[\\s]=[\\s]\\\"application/rss+xml\\\""
+ // "href[\\s]=[\\s]\\\"application/rss+xml\\\""
+ QRegExp rx( "(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)", false);
+ if (rx.search(str)!=-1)
+ s2=rx.cap(1);
+ else{
+ // does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
+ int pos=0;
+ QStringList feeds;
+ QString host=d->url.host();
+ rx.setPattern("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)");
+ while ( pos >= 0 ) {
+ pos = rx.search( str, pos );
+ s2=rx.cap(1);
+ if (s2.endsWith(".rdf")|s2.endsWith(".rss")|s2.endsWith(".xml"))
+ feeds.append(s2);
+ if ( pos >= 0 ) {
+ pos += rx.matchedLength();
+ }
+ }
+
+ s2=feeds.first();
+ KURL testURL;
+ // loop through, prefer feeds on same host
+ for ( QStringList::Iterator it = feeds.begin(); it != feeds.end(); ++it ) {
+ testURL=*it;
+ if (testURL.host()==host)
+ {
+ s2=*it;
+ break;
+ }
+ }
+ }
+
+ if (s2.isNull()) {
+ kdDebug() << "No feed found for a site" << endl;
+ return;
+ }
+
+ if (KURL::isRelativeURL(s2))
+ {
+ if (s2.startsWith("//"))
+ {
+ s2=s2.prepend(d->url.protocol()+":");
+ d->discoveredFeedURL=s2;
+ }
+ else if (s2.startsWith("/"))
+ {
+ d->discoveredFeedURL=d->url;
+ d->discoveredFeedURL.setPath(s2);
+ }
+ else
+ {
+ d->discoveredFeedURL=d->url;
+ d->discoveredFeedURL.addPath(s2);
+ }
+ d->discoveredFeedURL.cleanPath();
+ }
+ else
+ d->discoveredFeedURL=s2;
+
+ d->discoveredFeedURL.cleanPath();
+}
+
+#include "loader.moc"
+// vim:noet:ts=4