summaryrefslogtreecommitdiffstats
path: root/fbreader/src/formats/txt/PlainTextFormat.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fbreader/src/formats/txt/PlainTextFormat.cpp')
-rw-r--r--fbreader/src/formats/txt/PlainTextFormat.cpp253
1 files changed, 253 insertions, 0 deletions
diff --git a/fbreader/src/formats/txt/PlainTextFormat.cpp b/fbreader/src/formats/txt/PlainTextFormat.cpp
new file mode 100644
index 0000000..7c9360f
--- /dev/null
+++ b/fbreader/src/formats/txt/PlainTextFormat.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cctype>
+#include <algorithm>
+
+#include <ZLOptions.h>
+#include <ZLOptionsDialog.h>
+#include <ZLOptionEntry.h>
+#include <ZLFile.h>
+
+#include "PlainTextFormat.h"
+
+#include "../../options/FBCategoryKey.h"
+
+const std::string OPTION_Initialized = "Initialized";
+const std::string OPTION_BreakType = "BreakType";
+const std::string OPTION_IgnoredIndent = "IgnoredIndent";
+const std::string OPTION_EmptyLinesBeforeNewSection = "EmptyLinesBeforeNewSection";
+const std::string OPTION_CreateContentsTable = "CreateContentsTable";
+
+PlainTextFormat::PlainTextFormat(const ZLFile &file) :
+ InitializedOption(FBCategoryKey::BOOKS, file.path(), OPTION_Initialized, false),
+ BreakTypeOption(FBCategoryKey::BOOKS, file.path(), OPTION_BreakType, 1),
+ IgnoredIndentOption(FBCategoryKey::BOOKS, file.path(), OPTION_IgnoredIndent, 1, 100, 1),
+ EmptyLinesBeforeNewSectionOption(FBCategoryKey::BOOKS, file.path(), OPTION_EmptyLinesBeforeNewSection, 1, 100, 1),
+ CreateContentsTableOption(FBCategoryKey::BOOKS, file.path(), OPTION_CreateContentsTable, false) {
+}
+
+PlainTextInfoPage::PlainTextInfoPage(ZLOptionsDialog &dialog, const ZLFile &file, const ZLResourceKey &key, bool showContentsEntry) : myFormat(file) {
+ if (!myFormat.initialized()) {
+ PlainTextFormatDetector detector;
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (!stream.isNull()) {
+ detector.detect(*stream, myFormat);
+ }
+ }
+
+ ZLDialogContent &tab = dialog.createTab(key);
+
+ BreakTypeOptionEntry *breakEntry = new BreakTypeOptionEntry(*this, myFormat.BreakTypeOption);
+ myIgnoredIndentEntry = new ZLSimpleSpinOptionEntry(myFormat.IgnoredIndentOption, 1);
+ tab.addOption(ZLResourceKey("breakType"), breakEntry);
+ tab.addOption(ZLResourceKey("ignoreIndent"), myIgnoredIndentEntry);
+ breakEntry->onValueSelected(breakEntry->initialIndex());
+
+ if (showContentsEntry) {
+ CreateContentsTableOptionEntry *contentsTableEntry = new CreateContentsTableOptionEntry(*this, myFormat.CreateContentsTableOption);
+ myEmptyLinesBeforeNewSectionEntry = new ZLSimpleSpinOptionEntry(myFormat.EmptyLinesBeforeNewSectionOption, 1);
+ tab.addOption(ZLResourceKey("buildTOC"), contentsTableEntry);
+ tab.addOption(ZLResourceKey("emptyLines"), myEmptyLinesBeforeNewSectionEntry);
+ contentsTableEntry->onStateChanged(contentsTableEntry->initialState());
+ }
+}
+
+PlainTextInfoPage::~PlainTextInfoPage() {
+}
+
+const int BUFFER_SIZE = 4096;
+
+void PlainTextFormatDetector::detect(ZLInputStream &stream, PlainTextFormat &format) {
+ if (!stream.open()) {
+ return;
+ }
+
+ const unsigned int tableSize = 10;
+
+ unsigned int lineCounter = 0;
+ int emptyLineCounter = -1;
+ unsigned int stringsWithLengthLessThan81Counter = 0;
+ unsigned int stringIndentTable[tableSize] = { 0 };
+ unsigned int emptyLinesTable[tableSize] = { 0 };
+ unsigned int emptyLinesBeforeShortStringTable[tableSize] = { 0 };
+
+ bool currentLineIsEmpty = true;
+ unsigned int currentLineLength = 0;
+ unsigned int currentLineIndent = 0;
+ int currentNumberOfEmptyLines = -1;
+
+ char *buffer = new char[BUFFER_SIZE];
+ int length;
+ char previous = 0;
+ do {
+ length = stream.read(buffer, BUFFER_SIZE);
+ const char *end = buffer + length;
+ for (const char *ptr = buffer; ptr != end; ++ptr) {
+ ++currentLineLength;
+ if (*ptr == '\n') {
+ ++lineCounter;
+ if (currentLineIsEmpty) {
+ ++emptyLineCounter;
+ ++currentNumberOfEmptyLines;
+ } else {
+ if (currentNumberOfEmptyLines >= 0) {
+ int index = std::min(currentNumberOfEmptyLines, (int)tableSize - 1);
+ emptyLinesTable[index]++;
+ if (currentLineLength < 51) {
+ emptyLinesBeforeShortStringTable[index]++;
+ }
+ }
+ currentNumberOfEmptyLines = -1;
+ }
+ if (currentLineLength < 81) {
+ ++stringsWithLengthLessThan81Counter;
+ }
+ if (!currentLineIsEmpty) {
+ stringIndentTable[std::min(currentLineIndent, tableSize - 1)]++;
+ }
+
+ currentLineIsEmpty = true;
+ currentLineLength = 0;
+ currentLineIndent = 0;
+ } else if (*ptr == '\r') {
+ continue;
+ } else if (std::isspace((unsigned char)*ptr)) {
+ if (currentLineIsEmpty) {
+ ++currentLineIndent;
+ }
+ } else {
+ currentLineIsEmpty = false;
+ }
+ previous = *ptr;
+ }
+ } while (length == BUFFER_SIZE);
+ delete[] buffer;
+
+ unsigned int nonEmptyLineCounter = lineCounter - emptyLineCounter;
+
+ {
+ unsigned int indent = 0;
+ unsigned int lineWithIndent = 0;
+ for (; indent < tableSize; ++indent) {
+ lineWithIndent += stringIndentTable[indent];
+ if (lineWithIndent > 0.1 * nonEmptyLineCounter) {
+ break;
+ }
+ }
+ format.IgnoredIndentOption.setValue(indent + 1);
+ }
+
+ {
+ int breakType = 0;
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE;
+ if (stringsWithLengthLessThan81Counter < 0.3 * nonEmptyLineCounter) {
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE;
+ } else {
+ breakType |= PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT;
+ }
+ format.BreakTypeOption.setValue(breakType);
+ }
+
+ {
+ unsigned int max = 0;
+ unsigned index;
+ int emptyLinesBeforeNewSection = -1;
+ for (index = 2; index < tableSize; ++index) {
+ if (max < emptyLinesBeforeShortStringTable[index]) {
+ max = emptyLinesBeforeShortStringTable[index];
+ emptyLinesBeforeNewSection = index;
+ }
+ }
+ if (emptyLinesBeforeNewSection > 0) {
+ for (index = tableSize - 1; index > 0; --index) {
+ emptyLinesTable[index - 1] += emptyLinesTable[index];
+ emptyLinesBeforeShortStringTable[index - 1] += emptyLinesBeforeShortStringTable[index];
+ }
+ for (index = emptyLinesBeforeNewSection; index < tableSize; ++index) {
+ if ((emptyLinesBeforeShortStringTable[index] > 2) &&
+ (emptyLinesBeforeShortStringTable[index] > 0.7 * emptyLinesTable[index])) {
+ break;
+ }
+ }
+ emptyLinesBeforeNewSection = (index == tableSize) ? -1 : (int)index;
+ }
+ format.EmptyLinesBeforeNewSectionOption.setValue(emptyLinesBeforeNewSection);
+ format.CreateContentsTableOption.setValue(emptyLinesBeforeNewSection > 0);
+ }
+
+ format.InitializedOption.setValue(true);
+}
+
+BreakTypeOptionEntry::BreakTypeOptionEntry(PlainTextInfoPage &page, ZLIntegerOption &breakTypeOption) : myPage(page), myBreakTypeOption(breakTypeOption) {
+}
+
+BreakTypeOptionEntry::~BreakTypeOptionEntry() {
+}
+
+static std::vector<std::string> BREAK_TYPE_VALUES_VECTOR;
+
+int BreakTypeOptionEntry::initialIndex() const {
+ switch (myBreakTypeOption.value()) {
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE:
+ return 0;
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE:
+ return 1;
+ case PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT:
+ default:
+ return 2;
+ }
+}
+
+const std::string &BreakTypeOptionEntry::initialValue() const {
+ return values()[initialIndex()];
+}
+
+const std::vector<std::string> &BreakTypeOptionEntry::values() const {
+ if (BREAK_TYPE_VALUES_VECTOR.empty()) {
+ BREAK_TYPE_VALUES_VECTOR.push_back("New Line");
+ BREAK_TYPE_VALUES_VECTOR.push_back("Empty Line");
+ BREAK_TYPE_VALUES_VECTOR.push_back("Line With Indent");
+ }
+ return BREAK_TYPE_VALUES_VECTOR;
+}
+
+void BreakTypeOptionEntry::onAccept(const std::string &value) {
+ if (value == values()[0]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE);
+ } else if (value == values()[1]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE);
+ } else if (value == values()[2]) {
+ myBreakTypeOption.setValue(PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE | PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT);
+ }
+}
+
+void BreakTypeOptionEntry::onValueSelected(int index) {
+ myPage.myIgnoredIndentEntry->setVisible(index == 2);
+}
+
+CreateContentsTableOptionEntry::CreateContentsTableOptionEntry(PlainTextInfoPage &page, ZLBooleanOption &option) : ZLSimpleBooleanOptionEntry(option), myPage(page) {
+}
+
+CreateContentsTableOptionEntry::~CreateContentsTableOptionEntry() {
+}
+
+void CreateContentsTableOptionEntry::onStateChanged(bool state) {
+ myPage.myEmptyLinesBeforeNewSectionEntry->setVisible(state);
+}