summaryrefslogtreecommitdiffstats
path: root/fbreader/src/network/litres/LitResBooksFeedParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fbreader/src/network/litres/LitResBooksFeedParser.cpp')
-rw-r--r--fbreader/src/network/litres/LitResBooksFeedParser.cpp433
1 files changed, 433 insertions, 0 deletions
diff --git a/fbreader/src/network/litres/LitResBooksFeedParser.cpp b/fbreader/src/network/litres/LitResBooksFeedParser.cpp
new file mode 100644
index 0000000..970a8eb
--- /dev/null
+++ b/fbreader/src/network/litres/LitResBooksFeedParser.cpp
@@ -0,0 +1,433 @@
+/*
+ * Copyright (C) 2009-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+
+#include "LitResBooksFeedParser.h"
+#include "LitResBookItem.h"
+#include "LitResGenre.h"
+#include "LitResUtil.h"
+#include "../NetworkLink.h"
+
+static const std::string TAG_CATALOG = "catalit-fb2-books";
+static const std::string TAG_BOOK = "fb2-book";
+static const std::string TAG_TEXT_DESCRIPTION = "text_description";
+static const std::string TAG_HIDDEN = "hidden";
+static const std::string TAG_TITLE_INFO = "title-info";
+static const std::string TAG_GENRE = "genre";
+static const std::string TAG_AUTHOR = "author";
+static const std::string TAG_FIRST_NAME = "first-name";
+static const std::string TAG_MIDDLE_NAME = "middle-name";
+static const std::string TAG_LAST_NAME = "last-name";
+static const std::string TAG_AUTHOR_ID = "id";
+static const std::string TAG_BOOK_TITLE = "book-title";
+static const std::string TAG_ANNOTATION = "annotation";
+static const std::string TAG_DATE = "date";
+static const std::string TAG_SEQUENCE = "sequence";
+static const std::string TAG_LANGUAGE = "lang";
+
+std::string LitResBooksFeedParser::stringAttributeValue(const char **attributes, const char *name) {
+ if (attributes == 0) {
+ return std::string();
+ }
+ const char *value = attributeValue(attributes, name);
+ return value != 0 ? value : std::string();
+}
+
+LitResBooksFeedParser::LitResBooksFeedParser(const NetworkLink &link, NetworkItem::List &books, LitResBooksFeedItem::LoadingState *loadingState) :
+ myLink(link),
+ myBooks(books),
+ myIndex(0),
+ myLoadingState(loadingState) {
+ myState = START;
+}
+
+
+void LitResBooksFeedParser::startElementHandler(const char *tag, const char **attributes) {
+ processState(tag, false, attributes);
+ myState = getNextState(tag, false);
+ myBuffer.clear();
+}
+
+void LitResBooksFeedParser::endElementHandler(const char *tag) {
+ processState(tag, true, 0);
+ myState = getNextState(tag, true);
+ myBuffer.clear();
+}
+
+void LitResBooksFeedParser::characterDataHandler(const char *data, std::size_t len) {
+ myBuffer.append(data, len);
+}
+
+void LitResBooksFeedParser::processState(const std::string &tag, bool closed, const char **attributes) {
+ switch(myState) {
+ case START:
+ if (!closed && TAG_CATALOG == tag) {
+ if (myLoadingState) {
+ myLoadingState->AllPagesCount = ZLStringUtil::stringToInteger(stringAttributeValue(attributes, "pages"), 1);
+ }
+ }
+ break;
+ case CATALOG:
+ if (!closed && TAG_BOOK == tag) {
+ myBookId = stringAttributeValue(attributes, "hub_id");
+ myURLByType[NetworkItem::URL_COVER] =
+ stringAttributeValue(attributes, "cover_preview");
+ myURLByType[NetworkItem::URL_FULL_COVER] =
+ stringAttributeValue(attributes, "cover");
+
+ std::string url = stringAttributeValue(attributes, "url");
+ if (!url.empty()) {
+ myLink.rewriteUrl(url, true); // This code duplicates code in FBReader::openInBrowser and is not required
+ myURLByType[NetworkItem::URL_HTML_PAGE] = url;
+ }
+
+ //TODO check if buying book works right
+ std::string price = BuyBookReference::price(stringAttributeValue(attributes, "price"), "RUB");
+ myReferences.push_back(new BuyBookReference(
+ LitResUtil::generatePurchaseUrl(myLink, myBookId),
+ BookReference::FB2_ZIP,
+ BookReference::BUY,
+ price
+ ));
+
+ std::string hasTrial = stringAttributeValue(attributes, "has_trial");
+ if (!hasTrial.empty() && hasTrial != "0") {
+ myReferences.push_back(new BookReference(
+ LitResUtil::generateTrialUrl(myBookId),
+ BookReference::FB2_ZIP,
+ BookReference::DOWNLOAD_DEMO
+ ));
+ }
+
+ myReferences.push_back(new BookReference(
+ LitResUtil::generateDownloadUrl(myBookId),
+ BookReference::FB2_ZIP,
+ BookReference::DOWNLOAD_FULL_CONDITIONAL
+ ));
+ }
+ break;
+ case BOOK:
+ if (closed && TAG_BOOK == tag) {
+ myBooks.push_back(new LitResBookItem(
+ myLink,
+ myBookId,
+ myIndex++,
+ myTitle,
+ mySummary,
+ myLanguage,
+ myDate,
+ myAuthors,
+ myTags,
+ mySeriesTitle,
+ myIndexInSeries,
+ myURLByType,
+ myReferences,
+ myAuthorsIds
+ ));
+
+ myTitle.erase();
+ mySummary.erase();
+ myLanguage.erase();
+ myDate.erase();
+ mySeriesTitle.erase();
+ myIndexInSeries = 0;
+ myAuthors.clear();
+ myAuthorsIds.clear();
+ myTags.clear();
+ myURLByType.clear();
+ myReferences.clear();
+ }
+ break;
+ case BOOK_DESCRIPTION:
+ break;
+ case HIDDEN:
+ break;
+ case TITLE_INFO:
+ if (!closed) {
+ if (TAG_AUTHOR == tag) {
+ myAuthorFirstName.clear();
+ myAuthorMiddleName.clear();
+ myAuthorLastName.clear();
+ } else if (TAG_SEQUENCE == tag) {
+ mySeriesTitle = stringAttributeValue(attributes, "name");
+ if (!mySeriesTitle.empty()) {
+ const char *indexInSeries = attributeValue(attributes, "number");
+ myIndexInSeries = indexInSeries != 0 ? std::atoi(indexInSeries) : 0;
+ }
+ }
+ }
+ break;
+ case AUTHOR:
+ if (closed && TAG_AUTHOR == tag) {
+ NetworkBookItem::AuthorData data;
+ if (!myAuthorFirstName.empty()) {
+ data.DisplayName.append(myAuthorFirstName);
+ }
+ if (!myAuthorMiddleName.empty()) {
+ if (!data.DisplayName.empty()) {
+ data.DisplayName.append(" ");
+ }
+ data.DisplayName.append(myAuthorMiddleName);
+ }
+ if (!myAuthorLastName.empty()) {
+ if (!data.DisplayName.empty()) {
+ data.DisplayName.append(" ");
+ }
+ data.DisplayName.append(myAuthorLastName);
+ }
+ data.SortKey = myAuthorLastName;
+ myAuthors.push_back(data);
+ myAuthorsIds.push_back(myAuthorId);
+ }
+ break;
+ case FIRST_NAME:
+ if (closed && TAG_FIRST_NAME == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ myAuthorFirstName = myBuffer;
+ }
+ break;
+ case MIDDLE_NAME:
+ if (closed && TAG_MIDDLE_NAME == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ myAuthorMiddleName = myBuffer;
+ }
+ break;
+ case LAST_NAME:
+ if (closed && TAG_LAST_NAME == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ myAuthorLastName = myBuffer;
+ }
+ break;
+ case AUTHOR_ID:
+ if (closed && TAG_AUTHOR_ID == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ myAuthorId = myBuffer;
+ }
+ break;
+ case GENRE:
+ if (closed && TAG_GENRE == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+
+ const std::map<std::string,shared_ptr<LitResGenre> > &genresMap =
+ LitResGenreMap::Instance().genresMap();
+ const std::map<shared_ptr<LitResGenre>,std::string> &genresTitles =
+ LitResGenreMap::Instance().genresTitles();
+
+ std::map<std::string, shared_ptr<LitResGenre> >::const_iterator it = genresMap.find(myBuffer);
+ if (it != genresMap.end()) {
+ std::map<shared_ptr<LitResGenre>, std::string>::const_iterator jt = genresTitles.find(it->second);
+ if (jt != genresTitles.end()) {
+ myTags.push_back(jt->second);
+ }
+ }
+ }
+ break;
+ case BOOK_TITLE:
+ if (closed && TAG_BOOK_TITLE == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ myTitle = myBuffer;
+ }
+ break;
+ case ANNOTATION:
+ if (!closed) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ if (!myBuffer.empty()) {
+ mySummary.append(myBuffer);
+ mySummary.append(" ");
+ }
+ } else {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ mySummary.append(myBuffer);
+ int size = mySummary.size();
+ if (size > 0) {
+ if (TAG_ANNOTATION == tag) {
+ if (mySummary[size - 1] == '\n') {
+ mySummary.erase(size - 1);
+ }
+ } else if ("p" == tag) {
+ if (mySummary[size - 1] != '\n') {
+ mySummary.append("\n");
+ }
+ } else {
+ if (!myBuffer.empty() && mySummary[size - 1] != '\n') {
+ mySummary.append(" ");
+ }
+ }
+ }
+ }
+ break;
+ case DATE:
+ if (closed && TAG_DATE == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ myDate = myBuffer;
+ }
+ break;
+ case LANGUAGE:
+ if (closed && TAG_LANGUAGE == tag) {
+ ZLUnicodeUtil::utf8Trim(myBuffer);
+ myLanguage = myBuffer;
+ }
+ break;
+ }
+}
+
+LitResBooksFeedParser::State LitResBooksFeedParser::getNextState(const std::string &tag, bool closed) {
+ switch(myState) {
+ case START:
+ if (!closed && TAG_CATALOG == tag) {
+ return CATALOG;
+ }
+ break;
+ case CATALOG:
+ if (!closed) {
+ if (TAG_BOOK == tag) {
+ return BOOK;
+ }
+ } else {
+ if (TAG_CATALOG == tag) {
+ return START;
+ }
+ }
+ break;
+ case BOOK:
+ if (!closed) {
+ if (TAG_TEXT_DESCRIPTION == tag) {
+ return BOOK_DESCRIPTION;
+ }
+ } else {
+ if (TAG_BOOK == tag) {
+ return CATALOG;
+ }
+ }
+ break;
+ case BOOK_DESCRIPTION:
+ if (!closed) {
+ if (TAG_HIDDEN == tag) {
+ return HIDDEN;
+ }
+ } else {
+ if (TAG_TEXT_DESCRIPTION == tag) {
+ return BOOK;
+ }
+ }
+ break;
+ case HIDDEN:
+ if (!closed) {
+ if (TAG_TITLE_INFO == tag) {
+ return TITLE_INFO;
+ }
+ } else {
+ if (TAG_HIDDEN == tag) {
+ return BOOK_DESCRIPTION;
+ }
+ }
+ break;
+ case TITLE_INFO:
+ if (!closed) {
+ if (TAG_GENRE == tag) {
+ return GENRE;
+ } else if (TAG_AUTHOR == tag) {
+ return AUTHOR;
+ } else if (TAG_BOOK_TITLE == tag) {
+ return BOOK_TITLE;
+ } else if (TAG_ANNOTATION == tag) {
+ return ANNOTATION;
+ } else if (TAG_DATE == tag) {
+ return DATE;
+ } else if (TAG_LANGUAGE == tag) {
+ return LANGUAGE;
+ } /*else if (TAG_SEQUENCE == tag) {
+ return SEQUENCE; // handled without state through attributes
+ }*/
+ } else {
+ if (TAG_TITLE_INFO == tag) {
+ return HIDDEN;
+ }
+ }
+ break;
+ case AUTHOR:
+ if (!closed) {
+ if (TAG_FIRST_NAME == tag) {
+ return FIRST_NAME;
+ } else if (TAG_MIDDLE_NAME == tag) {
+ return MIDDLE_NAME;
+ } else if (TAG_LAST_NAME == tag) {
+ return LAST_NAME;
+ } else if (TAG_AUTHOR_ID == tag) {
+ return AUTHOR_ID;
+ }
+ } else {
+ if (TAG_AUTHOR == tag) {
+ return TITLE_INFO;
+ }
+ }
+ break;
+ case FIRST_NAME:
+ if (closed && TAG_FIRST_NAME == tag) {
+ return AUTHOR;
+ }
+ break;
+ case MIDDLE_NAME:
+ if (closed && TAG_MIDDLE_NAME == tag) {
+ return AUTHOR;
+ }
+ break;
+ case LAST_NAME:
+ if (closed && TAG_LAST_NAME == tag) {
+ return AUTHOR;
+ }
+ break;
+ case AUTHOR_ID:
+ if (closed && TAG_AUTHOR_ID == tag) {
+ return AUTHOR;
+ }
+ break;
+ case GENRE:
+ if (closed && TAG_GENRE == tag) {
+ return TITLE_INFO;
+ }
+ break;
+ case BOOK_TITLE:
+ if (closed && TAG_BOOK_TITLE == tag) {
+ return TITLE_INFO;
+ }
+ break;
+ case ANNOTATION:
+ if (closed && TAG_ANNOTATION == tag) {
+ return TITLE_INFO;
+ }
+ break;
+ case DATE:
+ if (closed && TAG_DATE == tag) {
+ return TITLE_INFO;
+ }
+ break;
+ case LANGUAGE:
+ if (closed && TAG_LANGUAGE == tag) {
+ return TITLE_INFO;
+ }
+ break;
+ }
+ return myState;
+}
+