From 460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 Mon Sep 17 00:00:00 2001 From: toma Date: Wed, 25 Nov 2009 17:56:58 +0000 Subject: Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdepim@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da --- akregator/src/librss/tools_p.cpp | 249 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 akregator/src/librss/tools_p.cpp (limited to 'akregator/src/librss/tools_p.cpp') diff --git a/akregator/src/librss/tools_p.cpp b/akregator/src/librss/tools_p.cpp new file mode 100644 index 000000000..dec831814 --- /dev/null +++ b/akregator/src/librss/tools_p.cpp @@ -0,0 +1,249 @@ +/* + * tools_p.cpp + * + * Copyright (c) 2001, 2002, 2003 Frerich Raabe + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the + * accompanying file 'COPYING'. + */ +#include "tools_p.h" + +#include +#include +#include +#include + +namespace RSS { + +time_t parseISO8601Date(const QString &s) +{ + // do some sanity check: 26-12-2004T00:00+00:00 is parsed to epoch+1 in the KRFCDate, which is wrong. So let's check if the date begins with YYYY -fo + if (s.stripWhiteSpace().left(4).toInt() < 1000) + return 0; // error + + // FIXME: imho this is done in KRFCDate::parseDateISO8601() automatically, so we could omit it? -fo + if (s.find('T') != -1) + return KRFCDate::parseDateISO8601(s); + else + return KRFCDate::parseDateISO8601(s + "T12:00:00"); +} + +QString childNodesAsXML(const QDomNode& parent) +{ + QDomNodeList list = parent.childNodes(); + QString str; + QTextStream ts( &str, IO_WriteOnly ); + for (uint i = 0; i < list.count(); ++i) + ts << list.item(i); + return str.stripWhiteSpace(); +} + +static QString plainTextToHtml(const QString& plainText) +{ + QString str(plainText); + str.replace("&", "&"); + str.replace("\"", """); + str.replace("<", "<"); + //str.replace(">", ">"); + str.replace("\n", "
"); + return str; +} + +enum ContentFormat { Text, HTML, XML, Binary }; + +static ContentFormat mapTypeToFormat(const QString& modep, const QString& typep, const QString& src) +{ + QString mode = modep.isNull() ? "escaped" : modep; + QString type = typep; + + //"If neither the type attribute nor the src attribute is provided, + //Atom Processors MUST behave as though the type attribute were + //present with a value of "text"" + if (type.isNull() && src.isEmpty()) + type = QString::fromUtf8("text"); + + if (type == QString::fromUtf8("html") + || type == QString::fromUtf8("text/html")) + return HTML; + + if (type == QString::fromUtf8("text") + || (type.startsWith(QString::fromUtf8("text/"), false) + && !type.startsWith(QString::fromUtf8("text/xml"), false)) + ) + return Text; + + QStringList xmltypes; + xmltypes.append(QString::fromUtf8("xhtml")); + // XML media types as defined in RFC3023: + xmltypes.append(QString::fromUtf8("text/xml")); + xmltypes.append(QString::fromUtf8("application/xml")); + xmltypes.append(QString::fromUtf8("text/xml-external-parsed-entity")); + xmltypes.append(QString::fromUtf8("application/xml-external-parsed-entity")); + xmltypes.append(QString::fromUtf8("application/xml-dtd")); + + + if (xmltypes.contains(type) + || type.endsWith(QString::fromUtf8("+xml"), false) + || type.endsWith(QString::fromUtf8("/xml"), false)) + return XML; + + return Binary; +} + +static QString extractAtomContent(const QDomElement& e) +{ + ContentFormat format = mapTypeToFormat(e.attribute("mode"), + e.attribute("type"), + e.attribute("src")); + + switch (format) + { + case HTML: + { + const bool hasPre = e.text().contains( "
", false ) || e.text().contains( "
", false) || result.contains("
 regexp
+            if(!isInlined && !hasHtml)						// perform nl2br if not a inline elt and it has no html elts
+                    result = result = result.replace(QChar('\n'), "
"); + if(!hasPre) // strip white spaces if no
+                    result = result.simplifyWhiteSpace();
+        }
+        
+        return result.isEmpty() ? QString::null : result;
+}
+
+QString extractTitle(const QDomNode & parent)
+{
+    QDomNode node = parent.namedItem(QString::fromLatin1("title"));
+    if (node.isNull())
+        return QString::null;
+
+    QString result = node.toElement().text();
+
+    result = KCharsets::resolveEntities(KCharsets::resolveEntities(result).replace(QRegExp("<[^>]*>"), "").remove("\\"));
+	result = result.simplifyWhiteSpace();
+
+    if (result.isEmpty())
+        return QString::null;
+
+    return result;
+}
+
+static void authorFromString(const QString& strp, QString& name, QString& email)
+{
+    QString str = strp.stripWhiteSpace();
+    if (str.isEmpty())
+        return;
+    
+    // look for something looking like a mail address ( "foo@bar.com", 
+    // "") and extract it
+    
+    QRegExp remail("\\s]+)>?"); // FIXME: user "proper" regexp,
+       // search kmail source for it
+    
+    int pos = remail.search(str);
+    if (pos != -1)
+    {
+        QString all = remail.cap(0);
+        email = remail.cap(1);
+        str.replace(all, ""); // remove mail address
+    }
+    
+    // simplify the rest and use it as name
+    
+    name = str.simplifyWhiteSpace();
+    
+    // after removing the email, str might have 
+    // the format "(Foo M. Bar)". We cut off 
+    // parentheses if there are any. However, if
+    // str is of the format "Foo M. Bar (President)",
+    // we should not cut anything.
+
+    QRegExp rename("^\\(([^\\)]*)\\)");
+    
+    pos = rename.search(name);
+    
+    if (pos != -1)
+    {
+        name = rename.cap(1);
+    }
+    
+    name = name.isEmpty() ? QString() : name;
+    email = email.isEmpty() ? QString() : email;
+}
+
+QString parseItemAuthor(const QDomElement& element, Format format, Version version)
+{
+    QString name;
+    QString email;
+
+    QDomElement dcCreator = element.namedItem("dc:creator").toElement();
+    
+    if (!dcCreator.isNull())
+         authorFromString(dcCreator.text(), name, email);
+    else if (format == AtomFeed)
+    {
+        QDomElement atomAuthor = element.namedItem("author").toElement();
+        if (atomAuthor.isNull())
+            atomAuthor = element.namedItem("atom:author").toElement();
+        if (!atomAuthor.isNull())
+        {
+            QDomElement atomName = atomAuthor.namedItem("name").toElement();
+            if (atomName.isNull())
+                atomName = atomAuthor.namedItem("atom:name").toElement();
+            name = atomName.text().stripWhiteSpace();
+            
+            QDomElement atomEmail = atomAuthor.namedItem("email").toElement();
+            if (atomEmail.isNull())
+                atomEmail = atomAuthor.namedItem("atom:email").toElement();
+            email = atomEmail.text().stripWhiteSpace();
+        }
+    }
+    else if (format == RSSFeed)
+    {
+        authorFromString(element.namedItem("author").toElement().text(), name, email);
+    }
+    
+    if (name.isNull())
+        name = email;
+    
+    if (!email.isNull())
+        return QString("%2").arg(email).arg(name);
+    else
+        return name;
+}
+
+} // namespace RSS
+
+// vim:noet:ts=4
-- 
cgit v1.2.1