summaryrefslogtreecommitdiffstats
path: root/fbreader/src/formats/txt
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2024-05-11 21:28:48 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2024-05-11 21:28:48 +0900
commit2462d03f322261bd616721c2b2065c4004b36c9c (patch)
tree239947a0737bb8386703a1497f12c09aebd3080a /fbreader/src/formats/txt
downloadtde-ebook-reader-2462d03f322261bd616721c2b2065c4004b36c9c.tar.gz
tde-ebook-reader-2462d03f322261bd616721c2b2065c4004b36c9c.zip
Initial import (as is) from Debian Snapshot's 'fbreader' source code (https://snapshot.debian.org/package/fbreader/0.99.4%2Bdfsg-6).
The Debian code is provided under GPL2 license. Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'fbreader/src/formats/txt')
-rw-r--r--fbreader/src/formats/txt/PlainTextFormat.cpp253
-rw-r--r--fbreader/src/formats/txt/PlainTextFormat.h112
-rw-r--r--fbreader/src/formats/txt/TxtBookReader.cpp124
-rw-r--r--fbreader/src/formats/txt/TxtBookReader.h59
-rw-r--r--fbreader/src/formats/txt/TxtPlugin.cpp79
-rw-r--r--fbreader/src/formats/txt/TxtPlugin.h37
-rw-r--r--fbreader/src/formats/txt/TxtReader.cpp200
-rw-r--r--fbreader/src/formats/txt/TxtReader.h56
8 files changed, 920 insertions, 0 deletions
diff --git a/fbreader/src/formats/txt/PlainTextFormat.cpp b/fbreader/src/formats/txt/PlainTextFormat.cpp
new file mode 100644
index 0000000..7c9360f
--- /dev/null
+++ b/fbreader/src/formats/txt/PlainTextFormat.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+#include <algorithm>
+
+#include <ZLOptions.h>
+#include <ZLOptionsDialog.h>
+#include <ZLOptionEntry.h>
+#include <ZLFile.h>
+
+#include "PlainTextFormat.h"
+
+#include "../../options/FBCategoryKey.h"
+
+const std::string OPTION_Initialized = "Initialized";
+const std::string OPTION_BreakType = "BreakType";
+const std::string OPTION_IgnoredIndent = "IgnoredIndent";
+const std::string OPTION_EmptyLinesBeforeNewSection = "EmptyLinesBeforeNewSection";
+const std::string OPTION_CreateContentsTable = "CreateContentsTable";
+
+PlainTextFormat::PlainTextFormat(const ZLFile &file) :
+ InitializedOption(FBCategoryKey::BOOKS, file.path(), OPTION_Initialized, false),
+ BreakTypeOption(FBCategoryKey::BOOKS, file.path(), OPTION_BreakType, 1),
+ IgnoredIndentOption(FBCategoryKey::BOOKS, file.path(), OPTION_IgnoredIndent, 1, 100, 1),
+ EmptyLinesBeforeNewSectionOption(FBCategoryKey::BOOKS, file.path(), OPTION_EmptyLinesBeforeNewSection, 1, 100, 1),
+ CreateContentsTableOption(FBCategoryKey::BOOKS, file.path(), OPTION_CreateContentsTable, false) {
+}
+
+PlainTextInfoPage::PlainTextInfoPage(ZLOptionsDialog &dialog, const ZLFile &file, const ZLResourceKey &key, bool showContentsEntry) : myFormat(file) {
+ if (!myFormat.initialized()) {
+ PlainTextFormatDetector detector;
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (!stream.isNull()) {
+ detector.detect(*stream, myFormat);
+ }
+ }
+
+ ZLDialogContent &tab = dialog.createTab(key);
+
+ BreakTypeOptionEntry *breakEntry = new BreakTypeOptionEntry(*this, myFormat.BreakTypeOption);
+ myIgnoredIndentEntry = new ZLSimpleSpinOptionEntry(myFormat.IgnoredIndentOption, 1);
+ tab.addOption(ZLResourceKey("breakType"), breakEntry);
+ tab.addOption(ZLResourceKey("ignoreIndent"), myIgnoredIndentEntry);
+ breakEntry->onValueSelected(breakEntry->initialIndex());
+
+ if (showContentsEntry) {
+ CreateContentsTableOptionEntry *contentsTableEntry = new CreateContentsTableOptionEntry(*this, myFormat.CreateContentsTableOption);
+ myEmptyLinesBeforeNewSectionEntry = new ZLSimpleSpinOptionEntry(myFormat.EmptyLinesBeforeNewSectionOption, 1);
+ tab.addOption(ZLResourceKey("buildTOC"), contentsTableEntry);
+ tab.addOption(ZLResourceKey("emptyLines"), myEmptyLinesBeforeNewSectionEntry);
+ contentsTableEntry->onStateChanged(contentsTableEntry->initialState());
+ }
+}
+
+PlainTextInfoPage::~PlainTextInfoPage() {
+}
+
+const int BUFFER_SIZE = 4096;
+
+void PlainTextFormatDetector::detect(ZLInputStream &stream, PlainTextFormat &format) {
+ if (!stream.open()) {
+ return;
+ }
+
+ const unsigned int tableSize = 10;
+
+ unsigned int lineCounter = 0;
+ int emptyLineCounter = -1;
+ unsigned int stringsWithLengthLessThan81Counter = 0;
+ unsigned int stringIndentTable[tableSize] = { 0 };
+ unsigned int emptyLinesTable[tableSize] = { 0 };
+ unsigned int emptyLinesBeforeShortStringTable[tableSize] = { 0 };
+
+ bool currentLineIsEmpty = true;
+ unsigned int currentLineLength = 0;
+ unsigned int currentLineIndent = 0;
+ int currentNumberOfEmptyLines = -1;
+
+ char *buffer = new char[BUFFER_SIZE];
+ int length;
+ char previous = 0;
+ do {
+ length = stream.read(buffer, BUFFER_SIZE);
+ const char *end = buffer + length;
+ for (const char *ptr = buffer; ptr != end; ++ptr) {
+ ++currentLineLength;
+ if (*ptr == '\n') {
+ ++lineCounter;
+ if (currentLineIsEmpty) {
+ ++emptyLineCounter;
+ ++currentNumberOfEmptyLines;
+ } else {
+ if (currentNumberOfEmptyLines >= 0) {
+ int index = std::min(currentNumberOfEmptyLines, (int)tableSize - 1);
+ emptyLinesTable[index]++;
+ if (currentLineLength < 51) {
+ emptyLinesBeforeShortStringTable[index]++;
+ }
+ }
+ currentNumberOfEmptyLines = -1;
+ }
+ if (currentLineLength < 81) {
+ ++stringsWithLengthLessThan81Counter;
+ }
+ if (!currentLineIsEmpty) {
+ stringIndentTable[std::min(currentLineIndent, tableSize - 1)]++;
+ }
+
+ currentLineIsEmpty = true;
+ currentLineLength = 0;
+ currentLineIndent = 0;
+ } else if (*ptr == '\r') {
+ continue;
+ } else if (std::isspace((unsigned char)*ptr)) {
+ if (currentLineIsEmpty) {
+ ++currentLineIndent;
+ }
+ } else {
+ currentLineIsEmpty = false;
+ }
+ previous = *ptr;
+ }
+ } while (length == BUFFER_SIZE);
+ delete[] buffer;
+
+ unsigned int nonEmptyLineCounter = lineCounter - emptyLineCounter;
+
+ {
+ unsigned int indent = 0;
+ unsigned int lineWithIndent = 0;
+ for (; indent < tableSize; ++indent) {
+ lineWithIndent += stringIndentTable[indent];
+ if (lineWithIndent > 0.1 * nonEmptyLineCounter) {
+ break;
+ }
+ }
+ format.IgnoredIndentOption.setValue(indent + 1);
+ }
+
+ {
+ int breakType = 0;
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE;
+ if (stringsWithLengthLessThan81Counter < 0.3 * nonEmptyLineCounter) {
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE;
+ } else {
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT;
+ }
+ format.BreakTypeOption.setValue(breakType);
+ }
+
+ {
+ unsigned int max = 0;
+ unsigned index;
+ int emptyLinesBeforeNewSection = -1;
+ for (index = 2; index < tableSize; ++index) {
+ if (max < emptyLinesBeforeShortStringTable[index]) {
+ max = emptyLinesBeforeShortStringTable[index];
+ emptyLinesBeforeNewSection = index;
+ }
+ }
+ if (emptyLinesBeforeNewSection > 0) {
+ for (index = tableSize - 1; index > 0; --index) {
+ emptyLinesTable[index - 1] += emptyLinesTable[index];
+ emptyLinesBeforeShortStringTable[index - 1] += emptyLinesBeforeShortStringTable[index];
+ }
+ for (index = emptyLinesBeforeNewSection; index < tableSize; ++index) {
+ if ((emptyLinesBeforeShortStringTable[index] > 2) &&
+ (emptyLinesBeforeShortStringTable[index] > 0.7 * emptyLinesTable[index])) {
+ break;
+ }
+ }
+ emptyLinesBeforeNewSection = (index == tableSize) ? -1 : (int)index;
+ }
+ format.EmptyLinesBeforeNewSectionOption.setValue(emptyLinesBeforeNewSection);
+ format.CreateContentsTableOption.setValue(emptyLinesBeforeNewSection > 0);
+ }
+
+ format.InitializedOption.setValue(true);
+}
+
+BreakTypeOptionEntry::BreakTypeOptionEntry(PlainTextInfoPage &page, ZLIntegerOption &breakTypeOption) : myPage(page), myBreakTypeOption(breakTypeOption) {
+}
+
+BreakTypeOptionEntry::~BreakTypeOptionEntry() {
+}
+
+static std::vector<std::string> BREAK_TYPE_VALUES_VECTOR;
+
+int BreakTypeOptionEntry::initialIndex() const {
+ switch (myBreakTypeOption.value()) {
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE:
+ return 0;
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE:
+ return 1;
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT:
+ default:
+ return 2;
+ }
+}
+
+const std::string &BreakTypeOptionEntry::initialValue() const {
+ return values()[initialIndex()];
+}
+
+const std::vector<std::string> &BreakTypeOptionEntry::values() const {
+ if (BREAK_TYPE_VALUES_VECTOR.empty()) {
+ BREAK_TYPE_VALUES_VECTOR.push_back("New Line");
+ BREAK_TYPE_VALUES_VECTOR.push_back("Empty Line");
+ BREAK_TYPE_VALUES_VECTOR.push_back("Line With Indent");
+ }
+ return BREAK_TYPE_VALUES_VECTOR;
+}
+
+void BreakTypeOptionEntry::onAccept(const std::string &value) {
+ if (value == values()[0]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE);
+ } else if (value == values()[1]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE);
+ } else if (value == values()[2]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT);
+ }
+}
+
+void BreakTypeOptionEntry::onValueSelected(int index) {
+ myPage.myIgnoredIndentEntry->setVisible(index == 2);
+}
+
+CreateContentsTableOptionEntry::CreateContentsTableOptionEntry(PlainTextInfoPage &page, ZLBooleanOption &option) : ZLSimpleBooleanOptionEntry(option), myPage(page) {
+}
+
+CreateContentsTableOptionEntry::~CreateContentsTableOptionEntry() {
+}
+
+void CreateContentsTableOptionEntry::onStateChanged(bool state) {
+ myPage.myEmptyLinesBeforeNewSectionEntry->setVisible(state);
+}
diff --git a/fbreader/src/formats/txt/PlainTextFormat.h b/fbreader/src/formats/txt/PlainTextFormat.h
new file mode 100644
index 0000000..59cc61f
--- /dev/null
+++ b/fbreader/src/formats/txt/PlainTextFormat.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PLAINTEXTFORMAT_H__
+#define __PLAINTEXTFORMAT_H__
+
+#include <ZLInputStream.h>
+#include <ZLOptions.h>
+#include <optionEntries/ZLSimpleOptionEntry.h>
+#include <ZLResource.h>
+
+#include "../FormatPlugin.h"
+
+class PlainTextFormat {
+
+public:
+ enum ParagraphBreakType {
+ BREAK_PARAGRAPH_AT_NEW_LINE = 1,
+ BREAK_PARAGRAPH_AT_EMPTY_LINE = 2,
+ BREAK_PARAGRAPH_AT_LINE_WITH_INDENT = 4,
+ };
+
+ PlainTextFormat(const ZLFile &file);
+ ~PlainTextFormat() {}
+
+ bool initialized() const { return InitializedOption.value(); }
+ int breakType() const { return BreakTypeOption.value(); }
+ int ignoredIndent() const { return IgnoredIndentOption.value(); }
+ int emptyLinesBeforeNewSection() const { return EmptyLinesBeforeNewSectionOption.value(); }
+ bool createContentsTable() const { return CreateContentsTableOption.value(); }
+
+private:
+ ZLBooleanOption InitializedOption;
+ ZLIntegerOption BreakTypeOption;
+ ZLIntegerRangeOption IgnoredIndentOption;
+ ZLIntegerRangeOption EmptyLinesBeforeNewSectionOption;
+ ZLBooleanOption CreateContentsTableOption;
+
+friend class PlainTextInfoPage;
+friend class PlainTextFormatDetector;
+};
+
+class PlainTextInfoPage : public FormatInfoPage {
+
+public:
+ PlainTextInfoPage(ZLOptionsDialog &dialog, const ZLFile &file, const ZLResourceKey &key, bool showContentsEntry);
+ ~PlainTextInfoPage();
+
+private:
+ PlainTextFormat myFormat;
+
+ ZLSimpleSpinOptionEntry *myIgnoredIndentEntry;
+ ZLSimpleSpinOptionEntry *myEmptyLinesBeforeNewSectionEntry;
+
+friend class BreakTypeOptionEntry;
+friend class CreateContentsTableOptionEntry;
+};
+
+class PlainTextFormatDetector {
+
+public:
+ PlainTextFormatDetector() {}
+ ~PlainTextFormatDetector() {}
+
+ void detect(ZLInputStream &stream, PlainTextFormat &format);
+};
+
+class BreakTypeOptionEntry : public ZLComboOptionEntry {
+
+public:
+ BreakTypeOptionEntry(PlainTextInfoPage &page, ZLIntegerOption &breakTypeOption);
+ ~BreakTypeOptionEntry();
+
+ int initialIndex() const;
+ const std::string &initialValue() const;
+ const std::vector<std::string> &values() const;
+ void onAccept(const std::string &value);
+ void onValueSelected(int index);
+
+private:
+ PlainTextInfoPage &myPage;
+ ZLIntegerOption &myBreakTypeOption;
+};
+
+class CreateContentsTableOptionEntry : public ZLSimpleBooleanOptionEntry {
+
+public:
+ CreateContentsTableOptionEntry(PlainTextInfoPage &page, ZLBooleanOption &option);
+ ~CreateContentsTableOptionEntry();
+ void onStateChanged(bool state);
+
+private:
+ PlainTextInfoPage &myPage;
+};
+
+#endif /* __PLAINTEXTFORMAT_H__ */
diff --git a/fbreader/src/formats/txt/TxtBookReader.cpp b/fbreader/src/formats/txt/TxtBookReader.cpp
new file mode 100644
index 0000000..c68ea2c
--- /dev/null
+++ b/fbreader/src/formats/txt/TxtBookReader.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+
+#include "TxtBookReader.h"
+#include "../../bookmodel/BookModel.h"
+
+TxtBookReader::TxtBookReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding) : TxtReader(encoding), BookReader(model), myFormat(format) {
+}
+
+void TxtBookReader::internalEndParagraph() {
+ if (!myLastLineIsEmpty) {
+ //myLineFeedCounter = 0;
+ myLineFeedCounter = -1; /* Fixed by Hatred: zero value was break LINE INDENT formater -
+ second line print with indent like new paragraf */
+ }
+ myLastLineIsEmpty = true;
+ endParagraph();
+}
+
+bool TxtBookReader::characterDataHandler(std::string &str) {
+ const char *ptr = str.data();
+ const char *end = ptr + str.length();
+ for (; ptr != end; ++ptr) {
+ if (std::isspace((unsigned char)*ptr)) {
+ if (*ptr != '\t') {
+ ++mySpaceCounter;
+ } else {
+ mySpaceCounter += myFormat.ignoredIndent() + 1; // TODO: implement single option in PlainTextFormat
+ }
+ } else {
+ myLastLineIsEmpty = false;
+ break;
+ }
+ }
+ if (ptr != end) {
+ if ((myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT) &&
+ myNewLine && (mySpaceCounter > myFormat.ignoredIndent())) {
+ internalEndParagraph();
+ beginParagraph();
+ }
+ addData(str);
+ if (myInsideContentsParagraph) {
+ addContentsData(str);
+ }
+ myNewLine = false;
+ }
+ return true;
+}
+
+bool TxtBookReader::newLineHandler() {
+ if (!myLastLineIsEmpty) {
+ myLineFeedCounter = -1;
+ }
+ myLastLineIsEmpty = true;
+ ++myLineFeedCounter;
+ myNewLine = true;
+ mySpaceCounter = 0;
+ bool paragraphBreak =
+ (myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) ||
+ ((myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE) && (myLineFeedCounter > 0));
+
+ if (myFormat.createContentsTable()) {
+// if (!myInsideContentsParagraph && (myLineFeedCounter == myFormat.emptyLinesBeforeNewSection() + 1)) {
+ /* Fixed by Hatred: remove '+ 1' for emptyLinesBeforeNewSection, it looks like very strange
+ when we should point count of empty string decrised by 1 in settings dialog */
+ if (!myInsideContentsParagraph && (myLineFeedCounter == myFormat.emptyLinesBeforeNewSection())) {
+ myInsideContentsParagraph = true;
+ internalEndParagraph();
+ insertEndOfSectionParagraph();
+ beginContentsParagraph();
+ enterTitle();
+ pushKind(SECTION_TITLE);
+ beginParagraph();
+ paragraphBreak = false;
+ }
+ if (myInsideContentsParagraph && (myLineFeedCounter == 1)) {
+ exitTitle();
+ endContentsParagraph();
+ popKind();
+ myInsideContentsParagraph = false;
+ paragraphBreak = true;
+ }
+ }
+
+ if (paragraphBreak) {
+ internalEndParagraph();
+ beginParagraph();
+ }
+ return true;
+}
+
+void TxtBookReader::startDocumentHandler() {
+ setMainTextModel();
+ pushKind(REGULAR);
+ beginParagraph();
+ myLineFeedCounter = 0;
+ myInsideContentsParagraph = false;
+ enterTitle();
+ myLastLineIsEmpty = true;
+ myNewLine = true;
+ mySpaceCounter = 0;
+}
+
+void TxtBookReader::endDocumentHandler() {
+ internalEndParagraph();
+}
diff --git a/fbreader/src/formats/txt/TxtBookReader.h b/fbreader/src/formats/txt/TxtBookReader.h
new file mode 100644
index 0000000..e02ad2a
--- /dev/null
+++ b/fbreader/src/formats/txt/TxtBookReader.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TXTBOOKREADER_H__
+#define __TXTBOOKREADER_H__
+
+#include <stack>
+
+#include "TxtReader.h"
+#include "PlainTextFormat.h"
+#include "../../bookmodel/BookReader.h"
+
+class BookModel;
+
+class TxtBookReader : public TxtReader, public BookReader {
+
+public:
+ TxtBookReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding);
+ ~TxtBookReader();
+
+protected:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool characterDataHandler(std::string &str);
+ bool newLineHandler();
+
+private:
+ void internalEndParagraph();
+
+private:
+ const PlainTextFormat &myFormat;
+
+ int myLineFeedCounter;
+ bool myInsideContentsParagraph;
+ bool myLastLineIsEmpty;
+ bool myNewLine;
+ int mySpaceCounter;
+};
+
+inline TxtBookReader::~TxtBookReader() {}
+
+#endif /* __TXTBOOKREADER_H__ */
diff --git a/fbreader/src/formats/txt/TxtPlugin.cpp b/fbreader/src/formats/txt/TxtPlugin.cpp
new file mode 100644
index 0000000..b155c2f
--- /dev/null
+++ b/fbreader/src/formats/txt/TxtPlugin.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "TxtPlugin.h"
+#include "TxtBookReader.h"
+#include "PlainTextFormat.h"
+
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+TxtPlugin::~TxtPlugin() {
+}
+
+bool TxtPlugin::providesMetaInfo() const {
+ return false;
+}
+
+bool TxtPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "txt";
+}
+
+bool TxtPlugin::readMetaInfo(Book &book) const {
+ shared_ptr<ZLInputStream> stream = book.file().inputStream();
+ if (stream.isNull()) {
+ return false;
+ }
+ detectEncodingAndLanguage(book, *stream);
+ if (book.encoding().empty()) {
+ return false;
+ }
+
+ return true;
+}
+
+bool TxtPlugin::readLanguageAndEncoding(Book &book) const {
+ (void)book;
+ return true;
+}
+
+bool TxtPlugin::readModel(BookModel &model) const {
+ const Book &book = *model.book();
+ const ZLFile &file = book.file();
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull()) {
+ return false;
+ }
+
+ PlainTextFormat format(file);
+ if (!format.initialized()) {
+ PlainTextFormatDetector detector;
+ detector.detect(*stream, format);
+ }
+
+ TxtBookReader(model, format, book.encoding()).readDocument(*stream);
+ return true;
+}
+
+FormatInfoPage *TxtPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) {
+ return new PlainTextInfoPage(dialog, file, ZLResourceKey("Text"), true);
+}
diff --git a/fbreader/src/formats/txt/TxtPlugin.h b/fbreader/src/formats/txt/TxtPlugin.h
new file mode 100644
index 0000000..e3e6e50
--- /dev/null
+++ b/fbreader/src/formats/txt/TxtPlugin.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TXTPLUGIN_H__
+#define __TXTPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class TxtPlugin : public FormatPlugin {
+
+public:
+ ~TxtPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+ FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file);
+};
+
+#endif /* __TXTPLUGIN_H__ */
diff --git a/fbreader/src/formats/txt/TxtReader.cpp b/fbreader/src/formats/txt/TxtReader.cpp
new file mode 100644
index 0000000..d2f5659
--- /dev/null
+++ b/fbreader/src/formats/txt/TxtReader.cpp
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+
+#include <ZLInputStream.h>
+
+#include "TxtReader.h"
+
+class TxtReaderCore {
+
+public:
+ TxtReaderCore(TxtReader &reader);
+ virtual void readDocument(ZLInputStream &stream);
+
+protected:
+ TxtReader &myReader;
+};
+
+class TxtReaderCoreUtf16 : public TxtReaderCore {
+
+public:
+ TxtReaderCoreUtf16(TxtReader &reader);
+ void readDocument(ZLInputStream &stream);
+
+protected:
+ virtual char getAscii(const char *ptr) = 0;
+ virtual void setAscii(char *ptr, char ascii) = 0;
+};
+
+class TxtReaderCoreUtf16LE : public TxtReaderCoreUtf16 {
+
+public:
+ TxtReaderCoreUtf16LE(TxtReader &reader);
+
+protected:
+ char getAscii(const char *ptr);
+ void setAscii(char *ptr, char ascii);
+};
+
+class TxtReaderCoreUtf16BE : public TxtReaderCoreUtf16 {
+
+public:
+ TxtReaderCoreUtf16BE(TxtReader &reader);
+
+protected:
+ char getAscii(const char *ptr);
+ void setAscii(char *ptr, char ascii);
+};
+
+TxtReader::TxtReader(const std::string &encoding) : EncodedTextReader(encoding) {
+ if (ZLEncodingConverter::UTF16 == encoding) {
+ myCore = new TxtReaderCoreUtf16LE(*this);
+ } else if (ZLEncodingConverter::UTF16BE == encoding) {
+ myCore = new TxtReaderCoreUtf16BE(*this);
+ } else {
+ myCore = new TxtReaderCore(*this);
+ }
+}
+
+TxtReader::~TxtReader() {
+}
+
+void TxtReader::readDocument(ZLInputStream &stream) {
+ if (!stream.open()) {
+ return;
+ }
+ startDocumentHandler();
+ myCore->readDocument(stream);
+ endDocumentHandler();
+ stream.close();
+}
+
+TxtReaderCore::TxtReaderCore(TxtReader &reader) : myReader(reader) {
+}
+
+TxtReaderCoreUtf16::TxtReaderCoreUtf16(TxtReader &reader) : TxtReaderCore(reader) {
+}
+
+void TxtReaderCore::readDocument(ZLInputStream &stream) {
+ const std::size_t BUFSIZE = 2048;
+ char *buffer = new char[BUFSIZE];
+ std::string str;
+ std::size_t length;
+ do {
+ length = stream.read(buffer, BUFSIZE);
+ char *start = buffer;
+ const char *end = buffer + length;
+ for (char *ptr = start; ptr != end; ++ptr) {
+ if (*ptr == '\n' || *ptr == '\r') {
+ bool skipNewLine = false;
+ if (*ptr == '\r' && (ptr + 1) != end && *(ptr + 1) == '\n') {
+ skipNewLine = true;
+ *ptr = '\n';
+ }
+ if (start != ptr) {
+ str.erase();
+ myReader.myConverter->convert(str, start, ptr + 1);
+ myReader.characterDataHandler(str);
+ }
+ if (skipNewLine) {
+ ++ptr;
+ }
+ start = ptr + 1;
+ myReader.newLineHandler();
+ } else if (((*ptr) & 0x80) == 0 && std::isspace((unsigned char)*ptr)) {
+ if (*ptr != '\t') {
+ *ptr = ' ';
+ }
+ } else {
+ }
+ }
+ if (start != end) {
+ str.erase();
+ myReader.myConverter->convert(str, start, end);
+ myReader.characterDataHandler(str);
+ }
+ } while (length == BUFSIZE);
+ delete[] buffer;
+}
+
+void TxtReaderCoreUtf16::readDocument(ZLInputStream &stream) {
+ const std::size_t BUFSIZE = 2048;
+ char *buffer = new char[BUFSIZE];
+ std::string str;
+ std::size_t length;
+ do {
+ length = stream.read(buffer, BUFSIZE);
+ char *start = buffer;
+ const char *end = buffer + length;
+ for (char *ptr = start; ptr < end; ptr += 2) {
+ const char chr = getAscii(ptr);
+ if (chr == '\n' || chr == '\r') {
+ bool skipNewLine = false;
+ if (chr == '\r' && ptr + 2 != end && getAscii(ptr + 2) == '\n') {
+ skipNewLine = true;
+ setAscii(ptr, '\n');
+ }
+ if (start != ptr) {
+ str.erase();
+ myReader.myConverter->convert(str, start, ptr + 2);
+ myReader.characterDataHandler(str);
+ }
+ if (skipNewLine) {
+ ptr += 2;
+ }
+ start = ptr + 2;
+ myReader.newLineHandler();
+ } else if (chr != 0 && ((*ptr) & 0x80) == 0 && std::isspace(chr)) {
+ if (chr != '\t') {
+ setAscii(ptr, ' ');
+ }
+ }
+ }
+ if (start != end) {
+ str.erase();
+ myReader.myConverter->convert(str, start, end);
+ myReader.characterDataHandler(str);
+ }
+ } while (length == BUFSIZE);
+ delete[] buffer;
+}
+
+TxtReaderCoreUtf16LE::TxtReaderCoreUtf16LE(TxtReader &reader) : TxtReaderCoreUtf16(reader) {
+}
+
+char TxtReaderCoreUtf16LE::getAscii(const char *ptr) {
+ return *(ptr + 1) == '\0' ? *ptr : '\0';
+}
+
+void TxtReaderCoreUtf16LE::setAscii(char *ptr, char ascii) {
+ *ptr = ascii;
+}
+
+TxtReaderCoreUtf16BE::TxtReaderCoreUtf16BE(TxtReader &reader) : TxtReaderCoreUtf16(reader) {
+}
+
+char TxtReaderCoreUtf16BE::getAscii(const char *ptr) {
+ return *ptr == '\0' ? *(ptr + 1) : '\0';
+}
+
+void TxtReaderCoreUtf16BE::setAscii(char *ptr, char ascii) {
+ *(ptr + 1) = ascii;
+}
diff --git a/fbreader/src/formats/txt/TxtReader.h b/fbreader/src/formats/txt/TxtReader.h
new file mode 100644
index 0000000..518ba8e
--- /dev/null
+++ b/fbreader/src/formats/txt/TxtReader.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TXTREADER_H__
+#define __TXTREADER_H__
+
+#include <string>
+
+#include <ZLEncodingConverter.h>
+
+#include "../EncodedTextReader.h"
+
+class ZLInputStream;
+class TxtReaderCore;
+
+class TxtReader : public EncodedTextReader {
+
+public:
+ void readDocument(ZLInputStream &stream);
+
+protected:
+ TxtReader(const std::string &encoding);
+ virtual ~TxtReader();
+
+protected:
+ virtual void startDocumentHandler() = 0;
+ virtual void endDocumentHandler() = 0;
+
+ virtual bool characterDataHandler(std::string &str) = 0;
+ virtual bool newLineHandler() = 0;
+
+private:
+ shared_ptr<TxtReaderCore> myCore;
+
+friend class TxtReaderCore;
+friend class TxtReaderCoreUtf16;
+friend class TxtReaderCoreUtf16BE;
+};
+
+#endif /* __TXTREADER_H__ */