diff options
author | Michele Calgaro <michele.calgaro@yahoo.it> | 2024-05-11 21:28:48 +0900 |
---|---|---|
committer | Michele Calgaro <michele.calgaro@yahoo.it> | 2024-05-11 21:28:48 +0900 |
commit | 2462d03f322261bd616721c2b2065c4004b36c9c (patch) | |
tree | 239947a0737bb8386703a1497f12c09aebd3080a /fbreader/src/formats/txt | |
download | tde-ebook-reader-2462d03f322261bd616721c2b2065c4004b36c9c.tar.gz tde-ebook-reader-2462d03f322261bd616721c2b2065c4004b36c9c.zip |
Initial import (as is) from Debian Snapshot's 'fbreader' source code (https://snapshot.debian.org/package/fbreader/0.99.4%2Bdfsg-6).
The Debian code is provided under GPL2 license.
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'fbreader/src/formats/txt')
-rw-r--r-- | fbreader/src/formats/txt/PlainTextFormat.cpp | 253 | ||||
-rw-r--r-- | fbreader/src/formats/txt/PlainTextFormat.h | 112 | ||||
-rw-r--r-- | fbreader/src/formats/txt/TxtBookReader.cpp | 124 | ||||
-rw-r--r-- | fbreader/src/formats/txt/TxtBookReader.h | 59 | ||||
-rw-r--r-- | fbreader/src/formats/txt/TxtPlugin.cpp | 79 | ||||
-rw-r--r-- | fbreader/src/formats/txt/TxtPlugin.h | 37 | ||||
-rw-r--r-- | fbreader/src/formats/txt/TxtReader.cpp | 200 | ||||
-rw-r--r-- | fbreader/src/formats/txt/TxtReader.h | 56 |
8 files changed, 920 insertions, 0 deletions
diff --git a/fbreader/src/formats/txt/PlainTextFormat.cpp b/fbreader/src/formats/txt/PlainTextFormat.cpp new file mode 100644 index 0000000..7c9360f --- /dev/null +++ b/fbreader/src/formats/txt/PlainTextFormat.cpp @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cctype> +#include <algorithm> + +#include <ZLOptions.h> +#include <ZLOptionsDialog.h> +#include <ZLOptionEntry.h> +#include <ZLFile.h> + +#include "PlainTextFormat.h" + +#include "../../options/FBCategoryKey.h" + +const std::string OPTION_Initialized = "Initialized"; +const std::string OPTION_BreakType = "BreakType"; +const std::string OPTION_IgnoredIndent = "IgnoredIndent"; +const std::string OPTION_EmptyLinesBeforeNewSection = "EmptyLinesBeforeNewSection"; +const std::string OPTION_CreateContentsTable = "CreateContentsTable"; + +PlainTextFormat::PlainTextFormat(const ZLFile &file) : + InitializedOption(FBCategoryKey::BOOKS, file.path(), OPTION_Initialized, false), + BreakTypeOption(FBCategoryKey::BOOKS, file.path(), OPTION_BreakType, 1), + IgnoredIndentOption(FBCategoryKey::BOOKS, file.path(), OPTION_IgnoredIndent, 1, 100, 1), + EmptyLinesBeforeNewSectionOption(FBCategoryKey::BOOKS, file.path(), OPTION_EmptyLinesBeforeNewSection, 1, 100, 1), + CreateContentsTableOption(FBCategoryKey::BOOKS, file.path(), OPTION_CreateContentsTable, false) { +} + +PlainTextInfoPage::PlainTextInfoPage(ZLOptionsDialog &dialog, const ZLFile &file, const ZLResourceKey &key, bool showContentsEntry) : myFormat(file) { + if (!myFormat.initialized()) { + PlainTextFormatDetector detector; + shared_ptr<ZLInputStream> stream = file.inputStream(); + if (!stream.isNull()) { + detector.detect(*stream, myFormat); + } + } + + ZLDialogContent &tab = dialog.createTab(key); + + BreakTypeOptionEntry *breakEntry = new BreakTypeOptionEntry(*this, myFormat.BreakTypeOption); + myIgnoredIndentEntry = new ZLSimpleSpinOptionEntry(myFormat.IgnoredIndentOption, 1); + tab.addOption(ZLResourceKey("breakType"), breakEntry); + tab.addOption(ZLResourceKey("ignoreIndent"), myIgnoredIndentEntry); + breakEntry->onValueSelected(breakEntry->initialIndex()); + + if (showContentsEntry) { + CreateContentsTableOptionEntry *contentsTableEntry = new CreateContentsTableOptionEntry(*this, myFormat.CreateContentsTableOption); + myEmptyLinesBeforeNewSectionEntry = new ZLSimpleSpinOptionEntry(myFormat.EmptyLinesBeforeNewSectionOption, 1); + tab.addOption(ZLResourceKey("buildTOC"), contentsTableEntry); + tab.addOption(ZLResourceKey("emptyLines"), myEmptyLinesBeforeNewSectionEntry); + contentsTableEntry->onStateChanged(contentsTableEntry->initialState()); + } +} + +PlainTextInfoPage::~PlainTextInfoPage() { +} + +const int BUFFER_SIZE = 4096; + +void PlainTextFormatDetector::detect(ZLInputStream &stream, PlainTextFormat &format) { + if (!stream.open()) { + return; + } + + const unsigned int tableSize = 10; + + unsigned int lineCounter = 0; + int emptyLineCounter = -1; + unsigned int stringsWithLengthLessThan81Counter = 0; + unsigned int stringIndentTable[tableSize] = { 0 }; + unsigned int emptyLinesTable[tableSize] = { 0 }; + unsigned int emptyLinesBeforeShortStringTable[tableSize] = { 0 }; + + bool currentLineIsEmpty = true; + unsigned int currentLineLength = 0; + unsigned int currentLineIndent = 0; + int currentNumberOfEmptyLines = -1; + + char *buffer = new char[BUFFER_SIZE]; + int length; + char previous = 0; + do { + length = stream.read(buffer, BUFFER_SIZE); + const char *end = buffer + length; + for (const char *ptr = buffer; ptr != end; ++ptr) { + ++currentLineLength; + if (*ptr == '\n') { + ++lineCounter; + if (currentLineIsEmpty) { + ++emptyLineCounter; + ++currentNumberOfEmptyLines; + } else { + if (currentNumberOfEmptyLines >= 0) { + int index = std::min(currentNumberOfEmptyLines, (int)tableSize - 1); + emptyLinesTable[index]++; + if (currentLineLength < 51) { + emptyLinesBeforeShortStringTable[index]++; + } + } + currentNumberOfEmptyLines = -1; + } + if (currentLineLength < 81) { + ++stringsWithLengthLessThan81Counter; + } + if (!currentLineIsEmpty) { + stringIndentTable[std::min(currentLineIndent, tableSize - 1)]++; + } + + currentLineIsEmpty = true; + currentLineLength = 0; + currentLineIndent = 0; + } else if (*ptr == '\r') { + continue; + } else if (std::isspace((unsigned char)*ptr)) { + if (currentLineIsEmpty) { + ++currentLineIndent; + } + } else { + currentLineIsEmpty = false; + } + previous = *ptr; + } + } while (length == BUFFER_SIZE); + delete[] buffer; + + unsigned int nonEmptyLineCounter = lineCounter - emptyLineCounter; + + { + unsigned int indent = 0; + unsigned int lineWithIndent = 0; + for (; indent < tableSize; ++indent) { + lineWithIndent += stringIndentTable[indent]; + if (lineWithIndent > 0.1 * nonEmptyLineCounter) { + break; + } + } + format.IgnoredIndentOption.setValue(indent + 1); + } + + { + int breakType = 0; + breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE; + if (stringsWithLengthLessThan81Counter < 0.3 * nonEmptyLineCounter) { + breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE; + } else { + breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT; + } + format.BreakTypeOption.setValue(breakType); + } + + { + unsigned int max = 0; + unsigned index; + int emptyLinesBeforeNewSection = -1; + for (index = 2; index < tableSize; ++index) { + if (max < emptyLinesBeforeShortStringTable[index]) { + max = emptyLinesBeforeShortStringTable[index]; + emptyLinesBeforeNewSection = index; + } + } + if (emptyLinesBeforeNewSection > 0) { + for (index = tableSize - 1; index > 0; --index) { + emptyLinesTable[index - 1] += emptyLinesTable[index]; + emptyLinesBeforeShortStringTable[index - 1] += emptyLinesBeforeShortStringTable[index]; + } + for (index = emptyLinesBeforeNewSection; index < tableSize; ++index) { + if ((emptyLinesBeforeShortStringTable[index] > 2) && + (emptyLinesBeforeShortStringTable[index] > 0.7 * emptyLinesTable[index])) { + break; + } + } + emptyLinesBeforeNewSection = (index == tableSize) ? -1 : (int)index; + } + format.EmptyLinesBeforeNewSectionOption.setValue(emptyLinesBeforeNewSection); + format.CreateContentsTableOption.setValue(emptyLinesBeforeNewSection > 0); + } + + format.InitializedOption.setValue(true); +} + +BreakTypeOptionEntry::BreakTypeOptionEntry(PlainTextInfoPage &page, ZLIntegerOption &breakTypeOption) : myPage(page), myBreakTypeOption(breakTypeOption) { +} + +BreakTypeOptionEntry::~BreakTypeOptionEntry() { +} + +static std::vector<std::string> BREAK_TYPE_VALUES_VECTOR; + +int BreakTypeOptionEntry::initialIndex() const { + switch (myBreakTypeOption.value()) { + case PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE: + return 0; + case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE: + return 1; + case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT: + default: + return 2; + } +} + +const std::string &BreakTypeOptionEntry::initialValue() const { + return values()[initialIndex()]; +} + +const std::vector<std::string> &BreakTypeOptionEntry::values() const { + if (BREAK_TYPE_VALUES_VECTOR.empty()) { + BREAK_TYPE_VALUES_VECTOR.push_back("New Line"); + BREAK_TYPE_VALUES_VECTOR.push_back("Empty Line"); + BREAK_TYPE_VALUES_VECTOR.push_back("Line With Indent"); + } + return BREAK_TYPE_VALUES_VECTOR; +} + +void BreakTypeOptionEntry::onAccept(const std::string &value) { + if (value == values()[0]) { + myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE); + } else if (value == values()[1]) { + myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE); + } else if (value == values()[2]) { + myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT); + } +} + +void BreakTypeOptionEntry::onValueSelected(int index) { + myPage.myIgnoredIndentEntry->setVisible(index == 2); +} + +CreateContentsTableOptionEntry::CreateContentsTableOptionEntry(PlainTextInfoPage &page, ZLBooleanOption &option) : ZLSimpleBooleanOptionEntry(option), myPage(page) { +} + +CreateContentsTableOptionEntry::~CreateContentsTableOptionEntry() { +} + +void CreateContentsTableOptionEntry::onStateChanged(bool state) { + myPage.myEmptyLinesBeforeNewSectionEntry->setVisible(state); +} diff --git a/fbreader/src/formats/txt/PlainTextFormat.h b/fbreader/src/formats/txt/PlainTextFormat.h new file mode 100644 index 0000000..59cc61f --- /dev/null +++ b/fbreader/src/formats/txt/PlainTextFormat.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __PLAINTEXTFORMAT_H__ +#define __PLAINTEXTFORMAT_H__ + +#include <ZLInputStream.h> +#include <ZLOptions.h> +#include <optionEntries/ZLSimpleOptionEntry.h> +#include <ZLResource.h> + +#include "../FormatPlugin.h" + +class PlainTextFormat { + +public: + enum ParagraphBreakType { + BREAK_PARAGRAPH_AT_NEW_LINE = 1, + BREAK_PARAGRAPH_AT_EMPTY_LINE = 2, + BREAK_PARAGRAPH_AT_LINE_WITH_INDENT = 4, + }; + + PlainTextFormat(const ZLFile &file); + ~PlainTextFormat() {} + + bool initialized() const { return InitializedOption.value(); } + int breakType() const { return BreakTypeOption.value(); } + int ignoredIndent() const { return IgnoredIndentOption.value(); } + int emptyLinesBeforeNewSection() const { return EmptyLinesBeforeNewSectionOption.value(); } + bool createContentsTable() const { return CreateContentsTableOption.value(); } + +private: + ZLBooleanOption InitializedOption; + ZLIntegerOption BreakTypeOption; + ZLIntegerRangeOption IgnoredIndentOption; + ZLIntegerRangeOption EmptyLinesBeforeNewSectionOption; + ZLBooleanOption CreateContentsTableOption; + +friend class PlainTextInfoPage; +friend class PlainTextFormatDetector; +}; + +class PlainTextInfoPage : public FormatInfoPage { + +public: + PlainTextInfoPage(ZLOptionsDialog &dialog, const ZLFile &file, const ZLResourceKey &key, bool showContentsEntry); + ~PlainTextInfoPage(); + +private: + PlainTextFormat myFormat; + + ZLSimpleSpinOptionEntry *myIgnoredIndentEntry; + ZLSimpleSpinOptionEntry *myEmptyLinesBeforeNewSectionEntry; + +friend class BreakTypeOptionEntry; +friend class CreateContentsTableOptionEntry; +}; + +class PlainTextFormatDetector { + +public: + PlainTextFormatDetector() {} + ~PlainTextFormatDetector() {} + + void detect(ZLInputStream &stream, PlainTextFormat &format); +}; + +class BreakTypeOptionEntry : public ZLComboOptionEntry { + +public: + BreakTypeOptionEntry(PlainTextInfoPage &page, ZLIntegerOption &breakTypeOption); + ~BreakTypeOptionEntry(); + + int initialIndex() const; + const std::string &initialValue() const; + const std::vector<std::string> &values() const; + void onAccept(const std::string &value); + void onValueSelected(int index); + +private: + PlainTextInfoPage &myPage; + ZLIntegerOption &myBreakTypeOption; +}; + +class CreateContentsTableOptionEntry : public ZLSimpleBooleanOptionEntry { + +public: + CreateContentsTableOptionEntry(PlainTextInfoPage &page, ZLBooleanOption &option); + ~CreateContentsTableOptionEntry(); + void onStateChanged(bool state); + +private: + PlainTextInfoPage &myPage; +}; + +#endif /* __PLAINTEXTFORMAT_H__ */ diff --git a/fbreader/src/formats/txt/TxtBookReader.cpp b/fbreader/src/formats/txt/TxtBookReader.cpp new file mode 100644 index 0000000..c68ea2c --- /dev/null +++ b/fbreader/src/formats/txt/TxtBookReader.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cctype> + +#include "TxtBookReader.h" +#include "../../bookmodel/BookModel.h" + +TxtBookReader::TxtBookReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding) : TxtReader(encoding), BookReader(model), myFormat(format) { +} + +void TxtBookReader::internalEndParagraph() { + if (!myLastLineIsEmpty) { + //myLineFeedCounter = 0; + myLineFeedCounter = -1; /* Fixed by Hatred: zero value was break LINE INDENT formater - + second line print with indent like new paragraf */ + } + myLastLineIsEmpty = true; + endParagraph(); +} + +bool TxtBookReader::characterDataHandler(std::string &str) { + const char *ptr = str.data(); + const char *end = ptr + str.length(); + for (; ptr != end; ++ptr) { + if (std::isspace((unsigned char)*ptr)) { + if (*ptr != '\t') { + ++mySpaceCounter; + } else { + mySpaceCounter += myFormat.ignoredIndent() + 1; // TODO: implement single option in PlainTextFormat + } + } else { + myLastLineIsEmpty = false; + break; + } + } + if (ptr != end) { + if ((myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT) && + myNewLine && (mySpaceCounter > myFormat.ignoredIndent())) { + internalEndParagraph(); + beginParagraph(); + } + addData(str); + if (myInsideContentsParagraph) { + addContentsData(str); + } + myNewLine = false; + } + return true; +} + +bool TxtBookReader::newLineHandler() { + if (!myLastLineIsEmpty) { + myLineFeedCounter = -1; + } + myLastLineIsEmpty = true; + ++myLineFeedCounter; + myNewLine = true; + mySpaceCounter = 0; + bool paragraphBreak = + (myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) || + ((myFormat.breakType() & PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE) && (myLineFeedCounter > 0)); + + if (myFormat.createContentsTable()) { +// if (!myInsideContentsParagraph && (myLineFeedCounter == myFormat.emptyLinesBeforeNewSection() + 1)) { + /* Fixed by Hatred: remove '+ 1' for emptyLinesBeforeNewSection, it looks like very strange + when we should point count of empty string decrised by 1 in settings dialog */ + if (!myInsideContentsParagraph && (myLineFeedCounter == myFormat.emptyLinesBeforeNewSection())) { + myInsideContentsParagraph = true; + internalEndParagraph(); + insertEndOfSectionParagraph(); + beginContentsParagraph(); + enterTitle(); + pushKind(SECTION_TITLE); + beginParagraph(); + paragraphBreak = false; + } + if (myInsideContentsParagraph && (myLineFeedCounter == 1)) { + exitTitle(); + endContentsParagraph(); + popKind(); + myInsideContentsParagraph = false; + paragraphBreak = true; + } + } + + if (paragraphBreak) { + internalEndParagraph(); + beginParagraph(); + } + return true; +} + +void TxtBookReader::startDocumentHandler() { + setMainTextModel(); + pushKind(REGULAR); + beginParagraph(); + myLineFeedCounter = 0; + myInsideContentsParagraph = false; + enterTitle(); + myLastLineIsEmpty = true; + myNewLine = true; + mySpaceCounter = 0; +} + +void TxtBookReader::endDocumentHandler() { + internalEndParagraph(); +} diff --git a/fbreader/src/formats/txt/TxtBookReader.h b/fbreader/src/formats/txt/TxtBookReader.h new file mode 100644 index 0000000..e02ad2a --- /dev/null +++ b/fbreader/src/formats/txt/TxtBookReader.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __TXTBOOKREADER_H__ +#define __TXTBOOKREADER_H__ + +#include <stack> + +#include "TxtReader.h" +#include "PlainTextFormat.h" +#include "../../bookmodel/BookReader.h" + +class BookModel; + +class TxtBookReader : public TxtReader, public BookReader { + +public: + TxtBookReader(BookModel &model, const PlainTextFormat &format, const std::string &encoding); + ~TxtBookReader(); + +protected: + void startDocumentHandler(); + void endDocumentHandler(); + + bool characterDataHandler(std::string &str); + bool newLineHandler(); + +private: + void internalEndParagraph(); + +private: + const PlainTextFormat &myFormat; + + int myLineFeedCounter; + bool myInsideContentsParagraph; + bool myLastLineIsEmpty; + bool myNewLine; + int mySpaceCounter; +}; + +inline TxtBookReader::~TxtBookReader() {} + +#endif /* __TXTBOOKREADER_H__ */ diff --git a/fbreader/src/formats/txt/TxtPlugin.cpp b/fbreader/src/formats/txt/TxtPlugin.cpp new file mode 100644 index 0000000..b155c2f --- /dev/null +++ b/fbreader/src/formats/txt/TxtPlugin.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <ZLFile.h> +#include <ZLInputStream.h> + +#include "TxtPlugin.h" +#include "TxtBookReader.h" +#include "PlainTextFormat.h" + +#include "../../bookmodel/BookModel.h" +#include "../../library/Book.h" + +TxtPlugin::~TxtPlugin() { +} + +bool TxtPlugin::providesMetaInfo() const { + return false; +} + +bool TxtPlugin::acceptsFile(const ZLFile &file) const { + return file.extension() == "txt"; +} + +bool TxtPlugin::readMetaInfo(Book &book) const { + shared_ptr<ZLInputStream> stream = book.file().inputStream(); + if (stream.isNull()) { + return false; + } + detectEncodingAndLanguage(book, *stream); + if (book.encoding().empty()) { + return false; + } + + return true; +} + +bool TxtPlugin::readLanguageAndEncoding(Book &book) const { + (void)book; + return true; +} + +bool TxtPlugin::readModel(BookModel &model) const { + const Book &book = *model.book(); + const ZLFile &file = book.file(); + shared_ptr<ZLInputStream> stream = file.inputStream(); + if (stream.isNull()) { + return false; + } + + PlainTextFormat format(file); + if (!format.initialized()) { + PlainTextFormatDetector detector; + detector.detect(*stream, format); + } + + TxtBookReader(model, format, book.encoding()).readDocument(*stream); + return true; +} + +FormatInfoPage *TxtPlugin::createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file) { + return new PlainTextInfoPage(dialog, file, ZLResourceKey("Text"), true); +} diff --git a/fbreader/src/formats/txt/TxtPlugin.h b/fbreader/src/formats/txt/TxtPlugin.h new file mode 100644 index 0000000..e3e6e50 --- /dev/null +++ b/fbreader/src/formats/txt/TxtPlugin.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __TXTPLUGIN_H__ +#define __TXTPLUGIN_H__ + +#include "../FormatPlugin.h" + +class TxtPlugin : public FormatPlugin { + +public: + ~TxtPlugin(); + bool providesMetaInfo() const; + bool acceptsFile(const ZLFile &file) const; + bool readMetaInfo(Book &book) const; + bool readLanguageAndEncoding(Book &book) const; + bool readModel(BookModel &model) const; + FormatInfoPage *createInfoPage(ZLOptionsDialog &dialog, const ZLFile &file); +}; + +#endif /* __TXTPLUGIN_H__ */ diff --git a/fbreader/src/formats/txt/TxtReader.cpp b/fbreader/src/formats/txt/TxtReader.cpp new file mode 100644 index 0000000..d2f5659 --- /dev/null +++ b/fbreader/src/formats/txt/TxtReader.cpp @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cctype> + +#include <ZLInputStream.h> + +#include "TxtReader.h" + +class TxtReaderCore { + +public: + TxtReaderCore(TxtReader &reader); + virtual void readDocument(ZLInputStream &stream); + +protected: + TxtReader &myReader; +}; + +class TxtReaderCoreUtf16 : public TxtReaderCore { + +public: + TxtReaderCoreUtf16(TxtReader &reader); + void readDocument(ZLInputStream &stream); + +protected: + virtual char getAscii(const char *ptr) = 0; + virtual void setAscii(char *ptr, char ascii) = 0; +}; + +class TxtReaderCoreUtf16LE : public TxtReaderCoreUtf16 { + +public: + TxtReaderCoreUtf16LE(TxtReader &reader); + +protected: + char getAscii(const char *ptr); + void setAscii(char *ptr, char ascii); +}; + +class TxtReaderCoreUtf16BE : public TxtReaderCoreUtf16 { + +public: + TxtReaderCoreUtf16BE(TxtReader &reader); + +protected: + char getAscii(const char *ptr); + void setAscii(char *ptr, char ascii); +}; + +TxtReader::TxtReader(const std::string &encoding) : EncodedTextReader(encoding) { + if (ZLEncodingConverter::UTF16 == encoding) { + myCore = new TxtReaderCoreUtf16LE(*this); + } else if (ZLEncodingConverter::UTF16BE == encoding) { + myCore = new TxtReaderCoreUtf16BE(*this); + } else { + myCore = new TxtReaderCore(*this); + } +} + +TxtReader::~TxtReader() { +} + +void TxtReader::readDocument(ZLInputStream &stream) { + if (!stream.open()) { + return; + } + startDocumentHandler(); + myCore->readDocument(stream); + endDocumentHandler(); + stream.close(); +} + +TxtReaderCore::TxtReaderCore(TxtReader &reader) : myReader(reader) { +} + +TxtReaderCoreUtf16::TxtReaderCoreUtf16(TxtReader &reader) : TxtReaderCore(reader) { +} + +void TxtReaderCore::readDocument(ZLInputStream &stream) { + const std::size_t BUFSIZE = 2048; + char *buffer = new char[BUFSIZE]; + std::string str; + std::size_t length; + do { + length = stream.read(buffer, BUFSIZE); + char *start = buffer; + const char *end = buffer + length; + for (char *ptr = start; ptr != end; ++ptr) { + if (*ptr == '\n' || *ptr == '\r') { + bool skipNewLine = false; + if (*ptr == '\r' && (ptr + 1) != end && *(ptr + 1) == '\n') { + skipNewLine = true; + *ptr = '\n'; + } + if (start != ptr) { + str.erase(); + myReader.myConverter->convert(str, start, ptr + 1); + myReader.characterDataHandler(str); + } + if (skipNewLine) { + ++ptr; + } + start = ptr + 1; + myReader.newLineHandler(); + } else if (((*ptr) & 0x80) == 0 && std::isspace((unsigned char)*ptr)) { + if (*ptr != '\t') { + *ptr = ' '; + } + } else { + } + } + if (start != end) { + str.erase(); + myReader.myConverter->convert(str, start, end); + myReader.characterDataHandler(str); + } + } while (length == BUFSIZE); + delete[] buffer; +} + +void TxtReaderCoreUtf16::readDocument(ZLInputStream &stream) { + const std::size_t BUFSIZE = 2048; + char *buffer = new char[BUFSIZE]; + std::string str; + std::size_t length; + do { + length = stream.read(buffer, BUFSIZE); + char *start = buffer; + const char *end = buffer + length; + for (char *ptr = start; ptr < end; ptr += 2) { + const char chr = getAscii(ptr); + if (chr == '\n' || chr == '\r') { + bool skipNewLine = false; + if (chr == '\r' && ptr + 2 != end && getAscii(ptr + 2) == '\n') { + skipNewLine = true; + setAscii(ptr, '\n'); + } + if (start != ptr) { + str.erase(); + myReader.myConverter->convert(str, start, ptr + 2); + myReader.characterDataHandler(str); + } + if (skipNewLine) { + ptr += 2; + } + start = ptr + 2; + myReader.newLineHandler(); + } else if (chr != 0 && ((*ptr) & 0x80) == 0 && std::isspace(chr)) { + if (chr != '\t') { + setAscii(ptr, ' '); + } + } + } + if (start != end) { + str.erase(); + myReader.myConverter->convert(str, start, end); + myReader.characterDataHandler(str); + } + } while (length == BUFSIZE); + delete[] buffer; +} + +TxtReaderCoreUtf16LE::TxtReaderCoreUtf16LE(TxtReader &reader) : TxtReaderCoreUtf16(reader) { +} + +char TxtReaderCoreUtf16LE::getAscii(const char *ptr) { + return *(ptr + 1) == '\0' ? *ptr : '\0'; +} + +void TxtReaderCoreUtf16LE::setAscii(char *ptr, char ascii) { + *ptr = ascii; +} + +TxtReaderCoreUtf16BE::TxtReaderCoreUtf16BE(TxtReader &reader) : TxtReaderCoreUtf16(reader) { +} + +char TxtReaderCoreUtf16BE::getAscii(const char *ptr) { + return *ptr == '\0' ? *(ptr + 1) : '\0'; +} + +void TxtReaderCoreUtf16BE::setAscii(char *ptr, char ascii) { + *(ptr + 1) = ascii; +} diff --git a/fbreader/src/formats/txt/TxtReader.h b/fbreader/src/formats/txt/TxtReader.h new file mode 100644 index 0000000..518ba8e --- /dev/null +++ b/fbreader/src/formats/txt/TxtReader.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __TXTREADER_H__ +#define __TXTREADER_H__ + +#include <string> + +#include <ZLEncodingConverter.h> + +#include "../EncodedTextReader.h" + +class ZLInputStream; +class TxtReaderCore; + +class TxtReader : public EncodedTextReader { + +public: + void readDocument(ZLInputStream &stream); + +protected: + TxtReader(const std::string &encoding); + virtual ~TxtReader(); + +protected: + virtual void startDocumentHandler() = 0; + virtual void endDocumentHandler() = 0; + + virtual bool characterDataHandler(std::string &str) = 0; + virtual bool newLineHandler() = 0; + +private: + shared_ptr<TxtReaderCore> myCore; + +friend class TxtReaderCore; +friend class TxtReaderCoreUtf16; +friend class TxtReaderCoreUtf16BE; +}; + +#endif /* __TXTREADER_H__ */ |