summaryrefslogtreecommitdiffstats
path: root/fbreader/src/formats/doc
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2024-05-11 21:28:48 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2024-05-11 21:28:48 +0900
commit2462d03f322261bd616721c2b2065c4004b36c9c (patch)
tree239947a0737bb8386703a1497f12c09aebd3080a /fbreader/src/formats/doc
downloadtde-ebook-reader-2462d03f322261bd616721c2b2065c4004b36c9c.tar.gz
tde-ebook-reader-2462d03f322261bd616721c2b2065c4004b36c9c.zip
Initial import (as is) from Debian Snapshot's 'fbreader' source code (https://snapshot.debian.org/package/fbreader/0.99.4%2Bdfsg-6).
The Debian code is provided under GPL2 license. Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'fbreader/src/formats/doc')
-rw-r--r--fbreader/src/formats/doc/DocBookReader.cpp377
-rw-r--r--fbreader/src/formats/doc/DocBookReader.h103
-rw-r--r--fbreader/src/formats/doc/DocFloatImageReader.cpp384
-rw-r--r--fbreader/src/formats/doc/DocFloatImageReader.h107
-rw-r--r--fbreader/src/formats/doc/DocInlineImageReader.cpp148
-rw-r--r--fbreader/src/formats/doc/DocInlineImageReader.h37
-rw-r--r--fbreader/src/formats/doc/DocMetaInfoReader.cpp38
-rw-r--r--fbreader/src/formats/doc/DocMetaInfoReader.h46
-rw-r--r--fbreader/src/formats/doc/DocPlugin.cpp71
-rw-r--r--fbreader/src/formats/doc/DocPlugin.h39
-rw-r--r--fbreader/src/formats/doc/DocStreams.cpp202
-rw-r--r--fbreader/src/formats/doc/DocStreams.h73
-rw-r--r--fbreader/src/formats/doc/OleMainStream.cpp1085
-rw-r--r--fbreader/src/formats/doc/OleMainStream.h223
-rw-r--r--fbreader/src/formats/doc/OleStorage.cpp304
-rw-r--r--fbreader/src/formats/doc/OleStorage.h92
-rw-r--r--fbreader/src/formats/doc/OleStream.cpp221
-rw-r--r--fbreader/src/formats/doc/OleStream.h58
-rw-r--r--fbreader/src/formats/doc/OleStreamParser.cpp210
-rw-r--r--fbreader/src/formats/doc/OleStreamParser.h101
-rw-r--r--fbreader/src/formats/doc/OleStreamReader.cpp86
-rw-r--r--fbreader/src/formats/doc/OleStreamReader.h46
-rw-r--r--fbreader/src/formats/doc/OleUtil.cpp58
-rw-r--r--fbreader/src/formats/doc/OleUtil.h32
24 files changed, 4141 insertions, 0 deletions
diff --git a/fbreader/src/formats/doc/DocBookReader.cpp b/fbreader/src/formats/doc/DocBookReader.cpp
new file mode 100644
index 0000000..99f471a
--- /dev/null
+++ b/fbreader/src/formats/doc/DocBookReader.cpp
@@ -0,0 +1,377 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <vector>
+#include <string>
+
+#include <ZLInputStream.h>
+#include <ZLLogger.h>
+#include <ZLFile.h>
+#include <ZLStringUtil.h>
+#include <ZLFileImage.h>
+
+#include "DocBookReader.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+#include "OleStorage.h"
+#include "OleMainStream.h"
+
+DocBookReader::DocBookReader(BookModel &model, const std::string &encoding) :
+ myModelReader(model),
+ myPictureCounter(0),
+ myEncoding(encoding) {
+ myReadState = READ_TEXT;
+}
+
+bool DocBookReader::readBook() {
+ const ZLFile &file = myModelReader.model().book()->file();
+ shared_ptr<ZLInputStream> stream = file.inputStream();
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+ myModelReader.setMainTextModel();
+ myModelReader.pushKind(REGULAR);
+ myModelReader.beginParagraph();
+
+ if (!readDocument(stream, true)) {
+ return false;
+ }
+
+ myModelReader.insertEndOfTextParagraph();
+ return true;
+}
+
+void DocBookReader::handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
+ if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_INFO) {
+ myFieldInfoBuffer.push_back(ucs2char);
+ return;
+ }
+ if (myReadState == READ_FIELD && myReadFieldState == DONT_READ_FIELD_TEXT) {
+ return;
+ }
+ if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && ucs2char == WORD_HORIZONTAL_TAB) {
+ //to remove pagination from TOC (from doc saved in OpenOffice)
+ myReadFieldState = DONT_READ_FIELD_TEXT;
+ return;
+ }
+ std::string utf8String;
+ ZLUnicodeUtil::Ucs2String ucs2String;
+ ucs2String.push_back(ucs2char);
+ ZLUnicodeUtil::ucs2ToUtf8(utf8String, ucs2String);
+ if (!myModelReader.paragraphIsOpen()) {
+ myModelReader.beginParagraph();
+ }
+ myModelReader.addData(utf8String);
+}
+
+void DocBookReader::handleHardLinebreak() {
+ if (myModelReader.paragraphIsOpen()) {
+ myModelReader.endParagraph();
+ }
+ myModelReader.beginParagraph();
+ if (!myCurrentStyleEntry.isNull()) {
+ myModelReader.addStyleEntry(*myCurrentStyleEntry);
+ }
+ for (std::size_t i = 0; i < myKindStack.size(); ++i) {
+ myModelReader.addControl(myKindStack.at(i), true);
+ }
+}
+
+void DocBookReader::handleParagraphEnd() {
+ if (myModelReader.paragraphIsOpen()) {
+ myModelReader.endParagraph();
+ }
+ myModelReader.beginParagraph();
+ myCurrentStyleEntry = 0;
+}
+
+void DocBookReader::handlePageBreak() {
+ if (myModelReader.paragraphIsOpen()) {
+ myModelReader.endParagraph();
+ }
+ myCurrentStyleEntry = 0;
+ myModelReader.insertEndOfSectionParagraph();
+ myModelReader.beginParagraph();
+}
+
+void DocBookReader::handleTableSeparator() {
+ handleChar(SPACE);
+ handleChar(VERTICAL_LINE);
+ handleChar(SPACE);
+}
+
+void DocBookReader::handleTableEndRow() {
+ handleParagraphEnd();
+}
+
+void DocBookReader::handleFootNoteMark() {
+ //TODO implement
+}
+
+void DocBookReader::handleStartField() {
+ if (myReadState == READ_FIELD) { //for nested fields
+ handleEndField();
+ }
+ myReadState = READ_FIELD;
+ myReadFieldState = READ_FIELD_INFO;
+ myHyperlinkTypeState = NO_HYPERLINK;
+}
+
+void DocBookReader::handleSeparatorField() {
+ static const std::string HYPERLINK = "HYPERLINK";
+ static const std::string SEQUENCE = "SEQ";
+// static const std::string PAGE = "PAGE";
+// static const std::string PAGEREF = "PAGEREF";
+// static const std::string SHAPE = "SHAPE";
+ static const std::string SPACE_DELIMETER = " ";
+ static const std::string LOCAL_LINK = "\\l";
+ static const std::string QUOTE = "\"";
+ myReadFieldState = READ_FIELD_TEXT;
+ myHyperlinkTypeState = NO_HYPERLINK;
+ ZLUnicodeUtil::Ucs2String buffer = myFieldInfoBuffer;
+ myFieldInfoBuffer.clear();
+ std::string utf8String;
+ ZLUnicodeUtil::ucs2ToUtf8(utf8String, buffer);
+ ZLUnicodeUtil::utf8Trim(utf8String);
+ if (utf8String.empty()) {
+ return;
+ }
+ std::vector<std::string> result = ZLStringUtil::split(utf8String, SPACE_DELIMETER);
+ //TODO split function can returns empty string, maybe fix it
+ std::vector<std::string> splitted;
+ for (std::size_t i = 0; i < result.size(); ++i) {
+ if (!result.at(i).empty()) {
+ splitted.push_back(result.at(i));
+ }
+ }
+
+ if (!splitted.empty() && splitted.at(0) == SEQUENCE) {
+ myReadFieldState = READ_FIELD_TEXT;
+ myHyperlinkTypeState = NO_HYPERLINK;
+ return;
+ }
+
+ if (splitted.size() < 2 || splitted.at(0) != HYPERLINK) {
+ myReadFieldState = DONT_READ_FIELD_TEXT;
+ //to remove pagination from TOC and not hyperlink fields
+ return;
+ }
+
+ if (splitted.at(1) == LOCAL_LINK) {
+ std::string link = parseLink(buffer);
+ if (!link.empty()) {
+ myModelReader.addHyperlinkControl(INTERNAL_HYPERLINK, link);
+ myHyperlinkTypeState = INT_HYPERLINK_INSERTED;
+ }
+ } else {
+ std::string link = parseLink(buffer, true);
+ if (!link.empty()) {
+ myModelReader.addHyperlinkControl(EXTERNAL_HYPERLINK, link);
+ myHyperlinkTypeState = EXT_HYPERLINK_INSERTED;
+ }
+ }
+}
+
+void DocBookReader::handleEndField() {
+ myFieldInfoBuffer.clear();
+ if (myReadState == READ_TEXT) {
+ return;
+ }
+ if (myHyperlinkTypeState == EXT_HYPERLINK_INSERTED) {
+ myModelReader.addControl(EXTERNAL_HYPERLINK, false);
+ } else if (myHyperlinkTypeState == INT_HYPERLINK_INSERTED) {
+ myModelReader.addControl(INTERNAL_HYPERLINK, false);
+ }
+ myReadState = READ_TEXT;
+ myHyperlinkTypeState = NO_HYPERLINK;
+
+}
+
+void DocBookReader::handleImage(const ZLFileImage::Blocks &blocks) {
+ std::string number;
+ ZLStringUtil::appendNumber(number, myPictureCounter++);
+ myModelReader.addImageReference(number);
+ ZLFile file(myModelReader.model().book()->file().path(), ZLMimeType::IMAGE_AUTO);
+ myModelReader.addImage(number, new ZLFileImage(file, blocks, ZLFileImage::ENCODING_NONE));
+}
+
+void DocBookReader::handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) {
+ if (ucs2char == WORD_MINUS) {
+ handleChar(MINUS);
+ } else if (ucs2char == WORD_SOFT_HYPHEN) {
+ //skip
+ } else if (ucs2char == WORD_HORIZONTAL_TAB) {
+ handleChar(ucs2char);
+ } else {
+// myTextBuffer.clear();
+ }
+}
+
+void DocBookReader::handleFontStyle(unsigned int fontStyle) {
+ if (myReadState == READ_FIELD && myReadFieldState == READ_FIELD_TEXT && myHyperlinkTypeState != NO_HYPERLINK) {
+ //to fix bug with hyperlink, that's only bold and doesn't looks like hyperlink
+ return;
+ }
+ while (!myKindStack.empty()) {
+ myModelReader.addControl(myKindStack.back(), false);
+ myKindStack.pop_back();
+ }
+ if (fontStyle & OleMainStream::CharInfo::FONT_BOLD) {
+ myKindStack.push_back(BOLD);
+ }
+ if (fontStyle & OleMainStream::CharInfo::FONT_ITALIC) {
+ myKindStack.push_back(ITALIC);
+ }
+ for (std::size_t i = 0; i < myKindStack.size(); ++i) {
+ myModelReader.addControl(myKindStack.at(i), true);
+ }
+}
+
+void DocBookReader::handleParagraphStyle(const OleMainStream::Style &styleInfo) {
+ if (styleInfo.HasPageBreakBefore) {
+ handlePageBreak();
+ }
+ shared_ptr<ZLTextStyleEntry> entry = new ZLTextStyleEntry(ZLTextStyleEntry::STYLE_OTHER_ENTRY);
+
+ switch (styleInfo.Alignment) {
+ default: // in that case, use default alignment type
+ break;
+ case OleMainStream::Style::ALIGNMENT_LEFT:
+ entry->setAlignmentType(ALIGN_LEFT);
+ break;
+ case OleMainStream::Style::ALIGNMENT_RIGHT:
+ entry->setAlignmentType(ALIGN_RIGHT);
+ break;
+ case OleMainStream::Style::ALIGNMENT_CENTER:
+ entry->setAlignmentType(ALIGN_CENTER);
+ break;
+ case OleMainStream::Style::ALIGNMENT_JUSTIFY:
+ entry->setAlignmentType(ALIGN_JUSTIFY);
+ break;
+ }
+
+ //TODO in case, where style is heading, but size is small it works wrong
+ const ZLTextStyleEntry::SizeUnit unit = ZLTextStyleEntry::SIZE_UNIT_PERCENT;
+ switch (styleInfo.StyleIdCurrent) {
+ default:
+ break;
+ case OleMainStream::Style::STYLE_H1:
+ entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 140, unit);
+ break;
+ case OleMainStream::Style::STYLE_H2:
+ entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 120, unit);
+ break;
+ case OleMainStream::Style::STYLE_H3:
+ entry->setLength(ZLTextStyleEntry::LENGTH_FONT_SIZE, 110, unit);
+ break;
+ }
+ myCurrentStyleEntry = entry;
+ myModelReader.addStyleEntry(*myCurrentStyleEntry);
+
+ // we should have the same font style, as for the previous paragraph,
+ // if it has the same StyleIdCurrent
+ if (myCurrentStyleInfo.StyleIdCurrent != OleMainStream::Style::STYLE_INVALID &&
+ myCurrentStyleInfo.StyleIdCurrent == styleInfo.StyleIdCurrent) {
+ for (std::size_t i = 0; i < myKindStack.size(); ++i) {
+ myModelReader.addControl(myKindStack.at(i), true);
+ }
+ } else {
+ myKindStack.clear();
+ // fill by the fontstyle, that was got from Stylesheet
+ handleFontStyle(styleInfo.CurrentCharInfo.FontStyle);
+ }
+ myCurrentStyleInfo = styleInfo;
+}
+
+void DocBookReader::handleBookmark(const std::string &name) {
+ myModelReader.addHyperlinkLabel(name);
+}
+
+std::string DocBookReader::parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode) {
+ //TODO add support for HYPERLINK like that:
+ // [0x13] HYPERLINK "http://site.ru/some text" \t "_blank" [0x14] text [0x15]
+ //Current implementation search for last QUOTE, so, it reads \t and _blank as part of link
+ //Last quote searching is need to handle link like that:
+ // [0x13] HYPERLINK "http://yandex.ru/yandsearch?text='some text' и "some text2"" [0x14] link text [0x15]
+
+ static const ZLUnicodeUtil::Ucs2Char QUOTE = 0x22;
+ std::size_t i, first = 0;
+ //TODO maybe functions findFirstOf and findLastOf should be in ZLUnicodeUtil class
+ for (i = 0; i < s.size(); ++i) {
+ if (s.at(i) == QUOTE) {
+ first = i;
+ break;
+ }
+ }
+ if (i == s.size()) {
+ return std::string();
+ }
+ std::size_t j, last = 0;
+ for (j = s.size(); j > 0 ; --j) {
+ if (s.at(j - 1) == QUOTE) {
+ last = j - 1;
+ break;
+ }
+ }
+ if (j == 0 || last == first) {
+ return std::string();
+ }
+
+ ZLUnicodeUtil::Ucs2String link;
+ for (std::size_t k = first + 1; k < last; ++k) {
+ ZLUnicodeUtil::Ucs2Char ch = s.at(k);
+ if (urlencode && ZLUnicodeUtil::isSpace(ch)) {
+ //TODO maybe implement function for encoding all signs in url, not only spaces and quotes
+ //TODO maybe add backslash support
+ link.push_back('%');
+ link.push_back('2');
+ link.push_back('0');
+ } else if (urlencode && ch == QUOTE) {
+ link.push_back('%');
+ link.push_back('2');
+ link.push_back('2');
+ } else {
+ link.push_back(ch);
+ }
+ }
+ std::string utf8String;
+ ZLUnicodeUtil::ucs2ToUtf8(utf8String, link);
+ return utf8String;
+}
+
+void DocBookReader::footnotesStartHandler() {
+ handlePageBreak();
+}
+
+void DocBookReader::ansiDataHandler(const char *buffer, std::size_t len) {
+ if (myConverter.isNull()) {
+ // lazy converter initialization
+ ZLEncodingCollection &collection = ZLEncodingCollection::Instance();
+ ZLEncodingConverterInfoPtr info = collection.info(myEncoding);
+ myConverter = info.isNull() ? collection.defaultConverter() : info->createConverter();
+ }
+ std::string utf8String;
+ myConverter->convert(utf8String, buffer, buffer + len);
+ ZLUnicodeUtil::utf8ToUcs2(myBuffer, utf8String);
+}
+
+void DocBookReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
+ myBuffer.push_back(symbol);
+}
diff --git a/fbreader/src/formats/doc/DocBookReader.h b/fbreader/src/formats/doc/DocBookReader.h
new file mode 100644
index 0000000..d80fb8e
--- /dev/null
+++ b/fbreader/src/formats/doc/DocBookReader.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCBOOKREADER_H__
+#define __DOCBOOKREADER_H__
+
+#include <vector>
+
+#include <shared_ptr.h>
+#include <ZLFile.h>
+#include <ZLTextStyleEntry.h>
+#include <ZLEncodingConverter.h>
+
+#include "../../bookmodel/BookReader.h"
+
+#include "OleMainStream.h"
+#include "OleStreamParser.h"
+
+class DocBookReader : public OleStreamParser {
+
+public:
+ DocBookReader(BookModel &model, const std::string &encoding);
+ ~DocBookReader();
+ bool readBook();
+
+private:
+ void ansiDataHandler(const char *buffer, std::size_t len);
+ void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
+ void footnotesStartHandler();
+
+ void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char);
+ void handleHardLinebreak();
+ void handleParagraphEnd();
+ void handlePageBreak();
+ void handleTableSeparator();
+ void handleTableEndRow();
+ void handleFootNoteMark();
+ void handleStartField();
+ void handleSeparatorField();
+ void handleEndField();
+ void handleImage(const ZLFileImage::Blocks &blocks);
+ void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char);
+
+ //formatting:
+ void handleFontStyle(unsigned int fontStyle);
+ void handleParagraphStyle(const OleMainStream::Style &styleInfo);
+ void handleBookmark(const std::string &name);
+
+private:
+ static std::string parseLink(ZLUnicodeUtil::Ucs2String s, bool urlencode = false);
+
+private:
+ BookReader myModelReader;
+
+ ZLUnicodeUtil::Ucs2String myFieldInfoBuffer;
+
+ enum {
+ READ_FIELD,
+ READ_TEXT
+ } myReadState;
+
+ enum {
+ READ_FIELD_TEXT,
+ DONT_READ_FIELD_TEXT,
+ READ_FIELD_INFO
+ } myReadFieldState;
+
+ //maybe it should be flag?
+ enum {
+ NO_HYPERLINK,
+ EXT_HYPERLINK_INSERTED,
+ INT_HYPERLINK_INSERTED
+ } myHyperlinkTypeState;
+
+ //formatting
+ std::vector<FBTextKind> myKindStack;
+ shared_ptr<ZLTextStyleEntry> myCurrentStyleEntry;
+ OleMainStream::Style myCurrentStyleInfo;
+ unsigned int myPictureCounter;
+
+ const std::string myEncoding;
+ shared_ptr<ZLEncodingConverter> myConverter;
+};
+
+inline DocBookReader::~DocBookReader() {}
+
+#endif /* __DOCBOOKREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocFloatImageReader.cpp b/fbreader/src/formats/doc/DocFloatImageReader.cpp
new file mode 100644
index 0000000..8c308e4
--- /dev/null
+++ b/fbreader/src/formats/doc/DocFloatImageReader.cpp
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleUtil.h"
+#include "OleStream.h"
+#include "OleMainStream.h"
+
+#include "DocFloatImageReader.h"
+
+DocFloatImageReader::DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream) :
+ myTableStream(tableStream),
+ myMainStream(mainStream),
+ myOffset(off),
+ myLength(len) {
+}
+
+void DocFloatImageReader::readAll() {
+ //OfficeArtContent structure is described at p.405-406 [MS-DOC]
+ if (!myTableStream->seek(myOffset, true)) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading float images");
+ return;
+ }
+
+ unsigned int count = 0;
+
+ RecordHeader header;
+ while (count < myLength) {
+ count += readRecordHeader(header, myTableStream);
+ switch (header.type) {
+ case 0xF000:
+ count += readDggContainer(myItem, header.length, myTableStream, myMainStream);
+ break;
+ case 0xF002:
+ count += readDgContainer(myItem, header.length, myTableStream);
+ break;
+ default:
+ return;
+ break;
+ }
+ }
+}
+
+ZLFileImage::Blocks DocFloatImageReader::getBlocksForShapeId(unsigned int shapeId) const {
+ FSPContainer container;
+ bool found = false;
+ for (std::size_t i = 0; !found && i < myItem.FSPs.size(); ++i) {
+ if (myItem.FSPs.at(i).fsp.shapeId == shapeId) {
+ found = true;
+ container = myItem.FSPs.at(i);
+ }
+ }
+
+ if (!found || container.fopte.empty()) {
+ return ZLFileImage::Blocks();
+ }
+
+ for (std::size_t i = 0; i < container.fopte.size(); ++i) {
+ const FOPTE &fopte = container.fopte.at(i);
+ if (fopte.pId == 0x0104 && !fopte.isComplex) { //0x0104 specifies the BLIP, see p.420 [MS-ODRAW]
+ if (fopte.value <= myItem.blips.size() && fopte.value > 0) {
+ Blip blip = myItem.blips.at(fopte.value - 1);
+ return blip.blocks;
+ }
+ }
+ }
+ return ZLFileImage::Blocks();
+}
+
+unsigned int DocFloatImageReader::readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream) {
+ //OfficeArtRecordHeader structure is described at p.26 [MS-ODRAW]
+ char buffer[8];
+ stream->read(buffer, 8);
+ unsigned int temp = OleUtil::getU2Bytes(buffer, 0);
+ header.version = temp & 0x000F;
+ header.instance = temp >> 4;
+ header.type = OleUtil::getU2Bytes(buffer, 2);
+ header.length = OleUtil::getU4Bytes(buffer, 4);
+ return 8;
+}
+
+unsigned int DocFloatImageReader::readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
+ //OfficeArtDggContainer structure is described at p.50 [MS-ODRAW]
+ RecordHeader header;
+ unsigned int count = 0;
+
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF001:
+ count += readBStoreContainer(item, header.length, stream, mainStream);
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+
+ stream->seek(1, false); //skipping dgglbl (see p.406 [MS-DOC])
+ ++count;
+
+ return count;
+}
+
+unsigned int DocFloatImageReader::readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
+ //OfficeArtBStoreContainer structure is described at p.58 [MS-ODRAW]
+ RecordHeader header;
+ unsigned int count = 0;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF007:
+ {
+ Blip blip;
+ count += readBStoreContainerFileBlock(blip, stream, mainStream);
+ item.blips.push_back(blip);
+ }
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream) {
+ stream->seek(header.length, false);
+ return header.length;
+}
+
+unsigned int DocFloatImageReader::readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream) {
+ //OfficeArtBStoreContainerFileBlock structure is described at p.59 [MS-ODRAW]
+ unsigned int count = readFBSE(blip.storeEntry, stream);
+ if (blip.storeEntry.offsetInDelay != (unsigned int)-1) {
+ if (mainStream->seek(blip.storeEntry.offsetInDelay, true)) { //see p.70 [MS-ODRAW]
+ //TODO maybe we should stop reading float images here
+ ZLLogger::Instance().println("DocPlugin", "DocFloatImageReader: problems with seeking for offset");
+ return count;
+ }
+ }
+ RecordHeader header;
+ unsigned int count2 = readRecordHeader(header, mainStream);
+ switch (header.type) {
+ case OleMainStream::IMAGE_WMF:
+ case OleMainStream::IMAGE_EMF:
+ case OleMainStream::IMAGE_PICT:
+ count2 += skipRecord(header, mainStream);
+ break;
+ case OleMainStream::IMAGE_JPEG:
+ case OleMainStream::IMAGE_JPEG2:
+ case OleMainStream::IMAGE_PNG:
+ case OleMainStream::IMAGE_DIB:
+ case OleMainStream::IMAGE_TIFF:
+ count2 += readBlip(blip, header, mainStream);
+ break;
+ }
+ blip.type = header.type;
+ return count;
+}
+
+unsigned int DocFloatImageReader::readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream) {
+ //OfficeArtBlip structure is described at p.60-66 [MS-ODRAW]
+ stream->seek(16, false); //skipping rgbUid1
+ unsigned int count = 16;
+
+ bool addField = false;
+ switch (header.type) {
+ case OleMainStream::IMAGE_PNG:
+ if (header.instance == 0x6E1) {
+ addField = true;
+ }
+ break;
+ case OleMainStream::IMAGE_JPEG:
+ case OleMainStream::IMAGE_JPEG2:
+ if (header.instance == 0x46B || header.instance == 0x6E3) {
+ addField = true;
+ }
+ break;
+ case OleMainStream::IMAGE_DIB:
+ if (header.instance == 0x7A9) {
+ addField = true;
+ }
+ case OleMainStream::IMAGE_TIFF:
+ if (header.instance == 0x6E5) {
+ addField = true;
+ }
+ break;
+ }
+
+ if (addField) {
+ stream->seek(16, false); //skipping rgbUid2
+ count += 16;
+ }
+ stream->seek(1, false); //skipping tag
+ count += 1;
+
+ blip.blocks = stream->getBlockPieceInfoList(stream->offset(), header.length - count);
+ count += header.length;
+ return count;
+}
+
+unsigned int DocFloatImageReader::readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream) {
+ //OfficeArtFBSE structure is described at p.68 [MS-ODRAW]
+ stream->seek(2, false); //skipping btWin32 and btMacOS
+ stream->seek(16, false); //skipping rgbUid
+ stream->seek(2, false); //skipping tag
+ fbse.size = read4Bytes(stream);
+ fbse.referenceCount = read4Bytes(stream);
+ fbse.offsetInDelay = read4Bytes(stream);
+ stream->seek(1, false); //skipping unused value
+ unsigned int lengthName = read1Byte(stream); //if it should be multiplied on 2?
+ stream->seek(2, false); // skipping unused values
+ if (lengthName > 0) {
+ stream->seek(lengthName, false); //skipping nameData
+ }
+ return 36 + lengthName;
+}
+
+unsigned int DocFloatImageReader::readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtDgContainer structure is described at p.52 [MS-ODRAW]
+ unsigned int count = 0;
+
+ RecordHeader header;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF008: //skip OfficeArtFDG record, p. 82 [MS-ODRAW]
+ stream->seek(8, false);
+ count += 8;
+ break;
+ case 0xF003:
+ count += readSpgrContainer(item, header.length, stream);
+ break;
+ case 0xF004:
+ {
+ FSPContainer fspContainer;
+ count += readSpContainter(fspContainer, header.length, stream);
+ item.FSPs.push_back(fspContainer);
+ }
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtSpgrContainer structure is described at p.56 [MS-ODRAW]
+ unsigned count = 0;
+ RecordHeader header;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF003:
+ count += readSpgrContainer(item, header.length, stream);
+ break;
+ case 0xF004:
+ {
+ FSPContainer fspContainer;
+ count += readSpContainter(fspContainer, header.length, stream);
+ item.FSPs.push_back(fspContainer);
+ }
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtSpContainter structure is described at p.53-55 [MS-ODRAW]
+ RecordHeader header;
+ unsigned int count = 0;
+ while (count < length) {
+ count += readRecordHeader(header, stream);
+ switch (header.type) {
+ case 0xF009: //skip OfficeArtFSPGR record, p.74 [MS-ODRAW]
+ stream->seek(16, false);
+ count += 16;
+ break;
+ case 0xF00A:
+ count += readFSP(item.fsp, stream);
+ break;
+ case 0xF00B:
+ count += readArrayFOPTE(item.fopte, header.length, stream);
+ break;
+ case 0xF00E: //OfficeArtAnchor
+ case 0xF00F: //OfficeArtChildAnchor, p.75 [MS-ODRAW]
+ case 0xF010: //OfficeArtClientAnchor
+ stream->seek(4, false);
+ count += 4;
+ break;
+ case 0xF00C:
+ case 0xF11F:
+ case 0xF11D:
+ break;
+ default:
+ count += skipRecord(header, stream);
+ break;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readFSP(FSP &fsp, shared_ptr<OleStream> stream) {
+ //OfficeArtFSP structure is described at p.76 [MS-ODRAW]
+ fsp.shapeId = read4Bytes(stream);
+ stream->seek(4, false);
+ return 8;
+}
+
+unsigned int DocFloatImageReader::readArrayFOPTE(std::vector<FOPTE> &fopteArray,unsigned int length, shared_ptr<OleStream> stream) {
+ //OfficeArtRGFOPTE structure is described at p.98 [MS-ODRAW]
+ unsigned int count = 0;
+ while (count < length) {
+ FOPTE fopte;
+ count += readFOPTE(fopte, stream);
+ fopteArray.push_back(fopte);
+ }
+ for (std::size_t i = 0; i < fopteArray.size(); ++i) {
+ if (fopteArray.at(i).isComplex) {
+ stream->seek(fopteArray.at(i).value, false);
+ count += fopteArray.at(i).value;
+ }
+ }
+ return count;
+}
+
+unsigned int DocFloatImageReader::readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream) {
+ //OfficeArtFOPTE structure is described at p.32 [MS-ODRAW]
+ unsigned int dtemp;
+ dtemp = read2Bytes(stream);
+ fopte.pId = (dtemp & 0x3fff);
+ fopte.isBlipId = ((dtemp & 0x4000) >> 14) == 0x1;
+ fopte.isComplex = ((dtemp & 0x8000) >> 15) == 0x1;
+ fopte.value = read4Bytes(stream);
+ return 6;
+}
+
+unsigned int DocFloatImageReader::read1Byte(shared_ptr<OleStream> stream) {
+ char b[1];
+ if (stream->read(b, 1) != 1) {
+ return 0;
+ }
+ return OleUtil::getU1Byte(b, 0);
+}
+
+unsigned int DocFloatImageReader::read2Bytes(shared_ptr<OleStream> stream) {
+ char b[2];
+ if (stream->read(b, 2) != 2) {
+ return 0;
+ }
+ return OleUtil::getU2Bytes(b, 0);
+}
+
+unsigned int DocFloatImageReader::read4Bytes(shared_ptr<OleStream> stream) {
+ char b[4];
+ if (stream->read(b, 4) != 4) {
+ return 0;
+ }
+ return OleUtil::getU4Bytes(b, 0);
+}
diff --git a/fbreader/src/formats/doc/DocFloatImageReader.h b/fbreader/src/formats/doc/DocFloatImageReader.h
new file mode 100644
index 0000000..d2d6c2e
--- /dev/null
+++ b/fbreader/src/formats/doc/DocFloatImageReader.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCFLOATIMAGEREADER_H__
+#define __DOCFLOATIMAGEREADER_H__
+
+#include <ZLFileImage.h>
+
+class DocFloatImageReader {
+
+public:
+ struct BlipStoreEntry { // see p.68 [MS-ODRAW]
+ unsigned int size; // size of blip in stream
+ unsigned int referenceCount; // (cRef) reference count for the the blip
+ unsigned int offsetInDelay; // foDelay, file offset in the delay stream
+ };
+
+ struct Blip { //see p.59, p63-66 [MS-ODRAW]
+ BlipStoreEntry storeEntry;
+ unsigned int type;
+ ZLFileImage::Blocks blocks;
+ };
+
+ struct FSP { //see p.76-77 [MS-ODRAW]
+ unsigned int shapeId; //spid
+ };
+
+ struct FOPTE { //see p.98 and p.32 [MS-ODRAW]
+ unsigned int pId; //pid
+ bool isBlipId; //fBid
+ bool isComplex; //fComplex
+ unsigned int value; //op
+ };
+
+ struct FSPContainer { //see p.53-55 [MS-ODRAW]
+ FSP fsp;
+ std::vector<FOPTE> fopte;
+ };
+
+ struct OfficeArtContent { //see p.405-406 [MS-DOC]
+ std::vector<Blip> blips; //retrieved from OfficeArtDggContainer
+ std::vector<FSPContainer> FSPs; //retrieved from OfficeArtDgContainer
+ };
+
+ struct RecordHeader { //see p.26 [MS-ODRAW]
+ unsigned int version;
+ unsigned int instance;
+ unsigned int type;
+ unsigned int length;
+ };
+
+public:
+ DocFloatImageReader(unsigned int off, unsigned int len, shared_ptr<OleStream> tableStream, shared_ptr<OleStream> mainStream);
+
+public:
+ void readAll();
+
+ ZLFileImage::Blocks getBlocksForShapeId(unsigned int shapeId) const;
+
+private:
+ static unsigned int readRecordHeader(RecordHeader &header, shared_ptr<OleStream> stream);
+ static unsigned int readDggContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
+
+ static unsigned int readBStoreContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
+ static unsigned int readBStoreContainerFileBlock(Blip &blip, shared_ptr<OleStream> stream, shared_ptr<OleStream> mainStream);
+ static unsigned int readBlip(Blip &blip, const RecordHeader &header, shared_ptr<OleStream> stream);
+ static unsigned int readFBSE(BlipStoreEntry &fbse, shared_ptr<OleStream> stream);
+
+ static unsigned int readFOPTE(FOPTE &fopte, shared_ptr<OleStream> stream);
+ static unsigned int readArrayFOPTE(std::vector<FOPTE> &fopte, unsigned int length, shared_ptr<OleStream> stream);
+ static unsigned int readFSP(FSP &fsp, shared_ptr<OleStream> stream);
+ static unsigned int readSpContainter(FSPContainer &item, unsigned int length, shared_ptr<OleStream> stream);
+ static unsigned int readSpgrContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream);
+ static unsigned int readDgContainer(OfficeArtContent &item, unsigned int length, shared_ptr<OleStream> stream);
+
+ static unsigned int skipRecord(const RecordHeader &header, shared_ptr<OleStream> stream);
+
+ static unsigned int read1Byte(shared_ptr<OleStream> stream);
+ static unsigned int read2Bytes(shared_ptr<OleStream> stream);
+ static unsigned int read4Bytes(shared_ptr<OleStream> stream);
+
+private:
+ shared_ptr<OleStream> myTableStream;
+ shared_ptr<OleStream> myMainStream;
+ unsigned int myOffset;
+ unsigned int myLength;
+
+ OfficeArtContent myItem;
+};
+
+#endif /* __DOCFLOATIMAGEREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocInlineImageReader.cpp b/fbreader/src/formats/doc/DocInlineImageReader.cpp
new file mode 100644
index 0000000..69ce74f
--- /dev/null
+++ b/fbreader/src/formats/doc/DocInlineImageReader.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "OleUtil.h"
+#include "OleMainStream.h"
+
+#include "DocInlineImageReader.h"
+
+DocInlineImageReader::DocInlineImageReader(shared_ptr<OleStream> dataStream) :
+ myDataStream(dataStream) {
+}
+
+ZLFileImage::Blocks DocInlineImageReader::getImagePieceInfo(unsigned int dataPos) {
+ if (myDataStream.isNull()) {
+ return ZLFileImage::Blocks();
+ }
+ if (!myDataStream->seek(dataPos, true)) {
+ return ZLFileImage::Blocks();
+ }
+
+ //reading PICF structure (see p. 421 [MS-DOC])
+ unsigned int picfHeaderSize = 4 + 2 + 8; //record length, headerLength and storage format
+ char headerBuffer[picfHeaderSize];
+ if (myDataStream->read(headerBuffer, picfHeaderSize) != picfHeaderSize) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int length = OleUtil::getU4Bytes(headerBuffer, 0);
+ unsigned int headerLength = OleUtil::getU2Bytes(headerBuffer, 4);
+ unsigned int formatType = OleUtil::getU2Bytes(headerBuffer, 6);
+
+ if (formatType != 0x0064) { //external link to some file; see p.394 [MS-DOC]
+ //TODO implement
+ return ZLFileImage::Blocks();
+ }
+ if (headerLength >= length) {
+ return ZLFileImage::Blocks();
+ }
+
+ //reading OfficeArtInlineSpContainer structure; see p.421 [MS-DOC] and p.56 [MS-ODRAW]
+ if (!myDataStream->seek(headerLength - picfHeaderSize, false)) { //skip header
+ return ZLFileImage::Blocks();
+ }
+
+ char buffer[8]; //for OfficeArtRecordHeader structure; see p.69 [MS-ODRAW]
+ bool found = false;
+ unsigned int curOffset = 0;
+ for (curOffset = headerLength; !found && curOffset + 8 <= length; curOffset += 8) {
+ if (myDataStream->read(buffer, 8) != 8) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int recordInstance = OleUtil::getU2Bytes(buffer, 0) >> 4;
+ unsigned int recordType = OleUtil::getU2Bytes(buffer, 2);
+ unsigned int recordLen = OleUtil::getU4Bytes(buffer, 4);
+
+ switch (recordType) {
+ case 0xF000: case 0xF001: case 0xF002: case 0xF003: case 0xF004: case 0xF005:
+ break;
+ case 0xF007:
+ {
+ myDataStream->seek(33, false);
+ char tmpBuf[1];
+ myDataStream->read(tmpBuf, 1);
+ unsigned int nameLength = OleUtil::getU1Byte(tmpBuf, 0);
+ myDataStream->seek(nameLength * 2 + 2, false);
+ curOffset += 33 + 1 + nameLength * 2 + 2;
+ }
+ break;
+ case 0xF008:
+ myDataStream->seek(8, false);
+ curOffset += 8;
+ break;
+ case 0xF009:
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ break;
+ case 0xF006: case 0xF00A: case 0xF00B: case 0xF00D: case 0xF00E: case 0xF00F: case 0xF010: case 0xF011: case 0xF122:
+ myDataStream->seek(recordLen, false);
+ curOffset += recordLen;
+ break;
+ case OleMainStream::IMAGE_EMF:
+ case OleMainStream::IMAGE_WMF:
+ case OleMainStream::IMAGE_PICT:
+ //TODO implement
+ return ZLFileImage::Blocks();
+ case OleMainStream::IMAGE_JPEG:
+ case OleMainStream::IMAGE_JPEG2:
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x46B || recordInstance == 0x6E3) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case OleMainStream::IMAGE_PNG:
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x6E1) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case OleMainStream::IMAGE_DIB: // DIB = BMP without 14-bytes header
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x7A9) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case OleMainStream::IMAGE_TIFF:
+ myDataStream->seek(17, false);
+ curOffset += 17;
+ if (recordInstance == 0x6E5) {
+ myDataStream->seek(16, false);
+ curOffset += 16;
+ }
+ found = true;
+ break;
+ case 0xF00C:
+ default:
+ return ZLFileImage::Blocks();
+ }
+ }
+
+ if (!found) {
+ return ZLFileImage::Blocks();
+ }
+ return myDataStream->getBlockPieceInfoList(dataPos + curOffset, length - curOffset);
+}
diff --git a/fbreader/src/formats/doc/DocInlineImageReader.h b/fbreader/src/formats/doc/DocInlineImageReader.h
new file mode 100644
index 0000000..9dab9ae
--- /dev/null
+++ b/fbreader/src/formats/doc/DocInlineImageReader.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCINLINEIMAGEREADER_H__
+#define __DOCINLINEIMAGEREADER_H__
+
+#include <vector>
+
+#include "OleStream.h"
+
+class DocInlineImageReader {
+
+public:
+ DocInlineImageReader(shared_ptr<OleStream> dataStream);
+ ZLFileImage::Blocks getImagePieceInfo(unsigned int dataPos);
+
+private:
+ shared_ptr<OleStream> myDataStream;
+};
+
+#endif /* __DOCINLINEIMAGEREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocMetaInfoReader.cpp b/fbreader/src/formats/doc/DocMetaInfoReader.cpp
new file mode 100644
index 0000000..37b39c2
--- /dev/null
+++ b/fbreader/src/formats/doc/DocMetaInfoReader.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "../../library/Book.h"
+
+#include "DocMetaInfoReader.h"
+
+DocMetaInfoReader::DocMetaInfoReader(Book &book) : myBook(book) {
+ myBook.removeAllAuthors();
+ myBook.setTitle(std::string());
+ myBook.setLanguage(std::string());
+ myBook.removeAllTags();
+}
+
+bool DocMetaInfoReader::readMetaInfo() {
+ myBook.removeAllAuthors();
+ myBook.setTitle(myBook.file().name(true));
+ myBook.removeAllTags();
+ return true;
+}
diff --git a/fbreader/src/formats/doc/DocMetaInfoReader.h b/fbreader/src/formats/doc/DocMetaInfoReader.h
new file mode 100644
index 0000000..db26d29
--- /dev/null
+++ b/fbreader/src/formats/doc/DocMetaInfoReader.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCMETAINFOREADER_H__
+#define __DOCMETAINFOREADER_H__
+
+#include <string>
+
+class Book;
+
+class DocMetaInfoReader {
+
+public:
+ DocMetaInfoReader(Book &book);
+ ~DocMetaInfoReader();
+ bool readMetaInfo();
+
+ /*
+ void startElementHandler(int tag, const char **attributes);
+ void endElementHandler(int tag);
+ void characterDataHandler(const char *text, std::size_t len);
+ */
+
+private:
+ Book &myBook;
+};
+
+inline DocMetaInfoReader::~DocMetaInfoReader() {}
+
+#endif /* __DOCMETAINFOREADER_H__ */
diff --git a/fbreader/src/formats/doc/DocPlugin.cpp b/fbreader/src/formats/doc/DocPlugin.cpp
new file mode 100644
index 0000000..ef6f511
--- /dev/null
+++ b/fbreader/src/formats/doc/DocPlugin.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+#include <ZLLogger.h>
+#include <ZLImage.h>
+#include <ZLEncodingConverter.h>
+
+#include "DocPlugin.h"
+#include "DocMetaInfoReader.h"
+#include "DocBookReader.h"
+#include "DocStreams.h"
+#include "../../bookmodel/BookModel.h"
+#include "../../library/Book.h"
+
+DocPlugin::DocPlugin() {
+}
+
+DocPlugin::~DocPlugin() {
+}
+
+bool DocPlugin::providesMetaInfo() const {
+ return true;
+}
+
+const std::string DocPlugin::supportedFileType() const {
+ return "doc";
+}
+
+bool DocPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "doc";
+}
+
+bool DocPlugin::readMetaInfo(Book &book) const {
+ if (!DocMetaInfoReader(book).readMetaInfo()) {
+ return false;
+ }
+
+ shared_ptr<ZLInputStream> stream = new DocAnsiStream(book.file(), 50000);
+ if (!detectEncodingAndLanguage(book, *stream)) {
+ stream = new DocUcs2Stream(book.file(), 50000);
+ detectLanguage(book, *stream, ZLEncodingConverter::UTF8, true);
+ }
+
+ return true;
+}
+
+bool DocPlugin::readLanguageAndEncoding(Book &/*book*/) const {
+ return true;
+}
+
+bool DocPlugin::readModel(BookModel &model) const {
+ return DocBookReader(model, model.book()->encoding()).readBook();
+}
diff --git a/fbreader/src/formats/doc/DocPlugin.h b/fbreader/src/formats/doc/DocPlugin.h
new file mode 100644
index 0000000..93b1803
--- /dev/null
+++ b/fbreader/src/formats/doc/DocPlugin.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCPLUGIN_H__
+#define __DOCPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class DocPlugin : public FormatPlugin {
+
+public:
+ DocPlugin();
+ ~DocPlugin();
+ bool providesMetaInfo() const;
+
+ const std::string supportedFileType() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+#endif /* __DOCPLUGIN_H__ */
diff --git a/fbreader/src/formats/doc/DocStreams.cpp b/fbreader/src/formats/doc/DocStreams.cpp
new file mode 100644
index 0000000..b21e15a
--- /dev/null
+++ b/fbreader/src/formats/doc/DocStreams.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+#include <cstdlib>
+#include <string>
+
+#include "DocStreams.h"
+#include "OleStreamReader.h"
+
+class DocReader : public OleStreamReader {
+
+public:
+ DocReader(char *buffer, std::size_t maxSize);
+ ~DocReader();
+ std::size_t readSize() const;
+
+private:
+ bool readStream(OleMainStream &stream);
+ void ansiDataHandler(const char *buffer, std::size_t len);
+ void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
+ void footnotesStartHandler();
+
+protected:
+ char *myBuffer;
+ const std::size_t myMaxSize;
+ std::size_t myActualSize;
+};
+
+class DocAnsiReader : public DocReader {
+
+public:
+ DocAnsiReader(char *buffer, std::size_t maxSize);
+ ~DocAnsiReader();
+
+private:
+ void ansiDataHandler(const char *buffer, std::size_t len);
+};
+
+class DocUcs2Reader : public DocReader {
+
+public:
+ DocUcs2Reader(char *buffer, std::size_t maxSize);
+ ~DocUcs2Reader();
+
+private:
+ void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol);
+};
+
+DocReader::DocReader(char *buffer, std::size_t maxSize) : myBuffer(buffer), myMaxSize(maxSize), myActualSize(0) {
+}
+
+DocReader::~DocReader() {
+}
+
+bool DocReader::readStream(OleMainStream &stream) {
+ // TODO make 2 optmizations:
+ // 1) If another piece is too big, reading of next piece can be stopped if some size parameter will be specified
+ // (it can be transfered as a parameter (with default 0 value, that means no need to use it) to readNextPiece method)
+ // 2) We can specify as a parameter for readNextPiece, what kind of piece should be read next (ANSI or not ANSI).
+ // As type of piece is known already, there's no necessary to read other pieces.
+ while (myActualSize < myMaxSize) {
+ if (!readNextPiece(stream)) {
+ break;
+ }
+ }
+ return true;
+}
+
+void DocReader::ansiDataHandler(const char*, std::size_t) {
+}
+
+void DocReader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char) {
+}
+
+void DocReader::footnotesStartHandler() {
+}
+
+std::size_t DocReader::readSize() const {
+ return myActualSize;
+}
+
+DocAnsiReader::DocAnsiReader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) {
+}
+
+DocAnsiReader::~DocAnsiReader() {
+}
+
+void DocAnsiReader::ansiDataHandler(const char *buffer, std::size_t dataLength) {
+ if (myActualSize < myMaxSize) {
+ const std::size_t len = std::min(dataLength, myMaxSize - myActualSize);
+ std::strncpy(myBuffer + myActualSize, buffer, len);
+ myActualSize += len;
+ }
+}
+
+DocUcs2Reader::DocUcs2Reader(char *buffer, std::size_t maxSize) : DocReader(buffer, maxSize) {
+}
+
+DocUcs2Reader::~DocUcs2Reader() {
+}
+
+void DocUcs2Reader::ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) {
+ if (myActualSize < myMaxSize) {
+ char buffer[4];
+ const std::size_t dataLength = ZLUnicodeUtil::ucs2ToUtf8(buffer, symbol);
+ const std::size_t len = std::min(dataLength, myMaxSize - myActualSize);
+ std::strncpy(myBuffer + myActualSize, buffer, len);
+ myActualSize += len;
+ }
+}
+
+DocStream::DocStream(const ZLFile& file, std::size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) {
+}
+
+DocStream::~DocStream() {
+ close();
+}
+
+bool DocStream::open() {
+ if (mySize != 0) {
+ myBuffer = new char[mySize];
+ }
+ shared_ptr<DocReader> reader = createReader(myBuffer, mySize);
+ shared_ptr<ZLInputStream> stream = myFile.inputStream();
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+ if (!reader->readDocument(stream, false)) {
+ return false;
+ }
+ mySize = reader->readSize();
+ myOffset = 0;
+ return true;
+}
+
+std::size_t DocStream::read(char *buffer, std::size_t maxSize) {
+ maxSize = std::min(maxSize, mySize - myOffset);
+ if (buffer != 0 && myBuffer != 0) {
+ std::memcpy(buffer, myBuffer + myOffset, maxSize);
+ }
+ myOffset += maxSize;
+ return maxSize;
+}
+
+void DocStream::close() {
+ if (myBuffer != 0) {
+ delete[] myBuffer;
+ myBuffer = 0;
+ }
+}
+
+void DocStream::seek(int offset, bool absoluteOffset) {
+ if (!absoluteOffset) {
+ offset += myOffset;
+ }
+ myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
+}
+
+std::size_t DocStream::offset() const {
+ return myOffset;
+}
+
+std::size_t DocStream::sizeOfOpened() {
+ return mySize;
+}
+
+DocAnsiStream::DocAnsiStream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) {
+}
+
+DocAnsiStream::~DocAnsiStream() {
+}
+
+shared_ptr<DocReader> DocAnsiStream::createReader(char *buffer, std::size_t maxSize) {
+ return new DocAnsiReader(buffer, maxSize);
+}
+
+DocUcs2Stream::DocUcs2Stream(const ZLFile& file, std::size_t maxSize) : DocStream(file, maxSize) {
+}
+
+DocUcs2Stream::~DocUcs2Stream() {
+}
+
+shared_ptr<DocReader> DocUcs2Stream::createReader(char *buffer, std::size_t maxSize) {
+ return new DocUcs2Reader(buffer, maxSize);
+}
diff --git a/fbreader/src/formats/doc/DocStreams.h b/fbreader/src/formats/doc/DocStreams.h
new file mode 100644
index 0000000..4b1538a
--- /dev/null
+++ b/fbreader/src/formats/doc/DocStreams.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __DOCSTREAMS_H__
+#define __DOCSTREAMS_H__
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+class DocReader;
+
+class DocStream : public ZLInputStream {
+
+public:
+ DocStream(const ZLFile& file, std::size_t maxSize);
+ ~DocStream();
+
+private:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+protected:
+ virtual shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize) = 0;
+
+private:
+ const ZLFile myFile;
+ char *myBuffer;
+ std::size_t mySize;
+ std::size_t myOffset;
+};
+
+class DocAnsiStream : public DocStream {
+
+public:
+ DocAnsiStream(const ZLFile& file, std::size_t maxSize);
+ ~DocAnsiStream();
+
+private:
+ shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize);
+};
+
+class DocUcs2Stream : public DocStream {
+
+public:
+ DocUcs2Stream(const ZLFile& file, std::size_t maxSize);
+ ~DocUcs2Stream();
+
+private:
+ shared_ptr<DocReader> createReader(char *buffer, std::size_t maxSize);
+};
+
+#endif /* __DOCSTREAMS_H__ */
diff --git a/fbreader/src/formats/doc/OleMainStream.cpp b/fbreader/src/formats/doc/OleMainStream.cpp
new file mode 100644
index 0000000..fe829e6
--- /dev/null
+++ b/fbreader/src/formats/doc/OleMainStream.cpp
@@ -0,0 +1,1085 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <string>
+
+#include <ZLLogger.h>
+#include <ZLUnicodeUtil.h>
+
+#include "OleUtil.h"
+#include "OleStorage.h"
+
+#include "DocInlineImageReader.h"
+
+#include "OleMainStream.h"
+
+OleMainStream::Style::Style() :
+ StyleIdCurrent(STYLE_INVALID),
+ StyleIdNext(STYLE_INVALID),
+ HasPageBreakBefore(false),
+ BeforeParagraphIndent(0),
+ AfterParagraphIndent(0),
+ LeftIndent(0),
+ FirstLineIndent(0),
+ RightIndent(0),
+ Alignment(ALIGNMENT_DEFAULT) {
+}
+
+OleMainStream::CharInfo::CharInfo() : FontStyle(FONT_REGULAR), FontSize(20) {
+}
+
+OleMainStream::SectionInfo::SectionInfo() : CharPosition(0), IsNewPage(true) {
+}
+
+OleMainStream::InlineImageInfo::InlineImageInfo() : DataPosition(0) {
+}
+
+OleMainStream::FloatImageInfo::FloatImageInfo() : ShapeId(0) {
+}
+
+OleMainStream::OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) : OleStream(storage, oleEntry, stream) {
+}
+
+bool OleMainStream::open(bool doReadFormattingData) {
+ if (OleStream::open() == false) {
+ return false;
+ }
+
+ static const std::size_t HEADER_SIZE = 768; //size of data in header of main stream
+ char headerBuffer[HEADER_SIZE];
+ seek(0, true);
+
+ if (read(headerBuffer, HEADER_SIZE) != HEADER_SIZE) {
+ return false;
+ }
+
+ bool result = readFIB(headerBuffer);
+ if (!result) {
+ return false;
+ }
+
+ // determining table stream number
+ unsigned int tableNumber = (OleUtil::getU2Bytes(headerBuffer, 0xA) & 0x0200) ? 1 : 0;
+ std::string tableName = tableNumber == 0 ? "0" : "1";
+ tableName += "Table";
+ OleEntry tableEntry;
+ result = myStorage->getEntryByName(tableName, tableEntry);
+
+ if (!result) {
+ // cant't find table stream (that can be only in case if file format is below Word 7/8), so building simple table stream
+ // TODO: CHECK may be not all old documents have ANSI
+ ZLLogger::Instance().println("DocPlugin", "cant't find table stream, building own simple piece table, that includes all charachters");
+ Piece piece = {myStartOfText, myEndOfText - myStartOfText, true, Piece::PIECE_TEXT, 0};
+ myPieces.push_back(piece);
+ return true;
+ }
+
+ result = readPieceTable(headerBuffer, tableEntry);
+
+ if (!result) {
+ ZLLogger::Instance().println("DocPlugin", "error during reading piece table");
+ return false;
+ }
+
+ if (!doReadFormattingData) {
+ return true;
+ }
+
+ OleEntry dataEntry;
+ if (myStorage->getEntryByName("Data", dataEntry)) {
+ myDataStream = new OleStream(myStorage, dataEntry, myBaseStream);
+ }
+
+ //result of reading following structures doesn't check, because all these
+ //problems can be ignored, and document can be showed anyway, maybe with wrong formatting
+ readBookmarks(headerBuffer, tableEntry);
+ readStylesheet(headerBuffer, tableEntry);
+ //readSectionsInfoTable(headerBuffer, tableEntry); //it isn't used now
+ readParagraphStyleTable(headerBuffer, tableEntry);
+ readCharInfoTable(headerBuffer, tableEntry);
+ readFloatingImages(headerBuffer, tableEntry);
+ return true;
+}
+
+const OleMainStream::Pieces &OleMainStream::getPieces() const {
+ return myPieces;
+}
+
+const OleMainStream::CharInfoList &OleMainStream::getCharInfoList() const {
+ return myCharInfoList;
+}
+
+const OleMainStream::StyleInfoList &OleMainStream::getStyleInfoList() const {
+ return myStyleInfoList;
+}
+
+const OleMainStream::BookmarksList &OleMainStream::getBookmarks() const {
+ return myBookmarks;
+}
+
+const OleMainStream::InlineImageInfoList &OleMainStream::getInlineImageInfoList() const {
+ return myInlineImageInfoList;
+}
+
+const OleMainStream::FloatImageInfoList &OleMainStream::getFloatImageInfoList() const {
+ return myFloatImageInfoList;
+}
+
+ZLFileImage::Blocks OleMainStream::getFloatImage(unsigned int shapeId) const {
+ if (myFLoatImageReader.isNull()) {
+ return ZLFileImage::Blocks();
+ }
+ return myFLoatImageReader->getBlocksForShapeId(shapeId);
+}
+
+ZLFileImage::Blocks OleMainStream::getInlineImage(unsigned int dataPosition) const {
+ if (myDataStream.isNull()) {
+ return ZLFileImage::Blocks();
+ }
+ DocInlineImageReader imageReader(myDataStream);
+ return imageReader.getImagePieceInfo(dataPosition);
+}
+
+bool OleMainStream::readFIB(const char *headerBuffer) {
+ int flags = OleUtil::getU2Bytes(headerBuffer, 0xA); //offset for flags
+
+ if (flags & 0x0004) { //flag for complex format
+ ZLLogger::Instance().println("DocPlugin", "This was fast-saved. Some information is lost");
+ //lostInfo = (flags & 0xF0) >> 4);
+ }
+
+ if (flags & 0x1000) { //flag for using extending charset
+ ZLLogger::Instance().println("DocPlugin", "File uses extended character set (get_word8_char)");
+ } else {
+ ZLLogger::Instance().println("DocPlugin", "File uses get_8bit_char character set");
+ }
+
+ if (flags & 0x100) { //flag for encrypted files
+ ZLLogger::Instance().println("DocPlugin", "File is encrypted");
+ // Encryption key = %08lx ; NumUtil::get4Bytes(header, 14)
+ return false;
+ }
+
+ unsigned int charset = OleUtil::getU2Bytes(headerBuffer, 0x14); //offset for charset number
+ if (charset && charset != 0x100) { //0x100 = default charset
+ ZLLogger::Instance().println("DocPlugin", "Using not default character set %d");
+ } else {
+ ZLLogger::Instance().println("DocPlugin", "Using default character set");
+ }
+
+ myStartOfText = OleUtil::get4Bytes(headerBuffer, 0x18); //offset for start of text value
+ myEndOfText = OleUtil::get4Bytes(headerBuffer, 0x1c); //offset for end of text value
+ return true;
+}
+
+void OleMainStream::splitPieces(const Pieces &s, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary) {
+ Pieces source = s;
+ dest1.clear();
+ dest2.clear();
+
+ int sumLength = 0;
+ std::size_t i = 0;
+ for (i = 0; i < source.size(); ++i) {
+ Piece piece = source.at(i);
+ if (piece.Length + sumLength >= boundary) {
+ Piece piece2 = piece;
+
+ piece.Length = boundary - sumLength;
+ piece.Type = type1;
+
+ piece2.Type = type2;
+ piece2.Offset += piece.Length * 2;
+ piece2.Length -= piece.Length;
+
+ if (piece.Length > 0) {
+ dest1.push_back(piece);
+ }
+ if (piece2.Length > 0) {
+ dest2.push_back(piece2);
+ }
+ ++i;
+ break;
+ }
+ sumLength += piece.Length;
+ piece.Type = type1;
+ dest1.push_back(piece);
+ }
+ for (; i < source.size(); ++i) {
+ Piece piece = source.at(i);
+ piece.Type = type2;
+ dest2.push_back(piece);
+ }
+
+}
+
+std::string OleMainStream::getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream) {
+ unsigned int clxOffset = OleUtil::getU4Bytes(headerBuffer, 0x01A2); //offset for CLX structure
+ unsigned int clxLength = OleUtil::getU4Bytes(headerBuffer, 0x01A6); //offset for value of CLX structure length
+
+ //1 step : loading CLX table from table stream
+ char *clxBuffer = new char[clxLength];
+ if (!tableStream.seek(clxOffset, true)) {
+ ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- error for seeking to CLX structure");
+ return std::string();
+ }
+ if (tableStream.read(clxBuffer, clxLength) != clxLength) {
+ ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure length is invalid");
+ return std::string();
+ }
+ std::string clx(clxBuffer, clxLength);
+ delete[] clxBuffer;
+
+ //2 step: searching for pieces table buffer at CLX
+ //(determines it by 0x02 as start symbol)
+ std::size_t from = 0;
+ std::size_t i;
+ std::string pieceTableBuffer;
+ while ((i = clx.find_first_of(0x02, from)) != std::string::npos) {
+ if (clx.size() < i + 1 + 4) {
+ ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure has invalid format");
+ return std::string();
+ }
+ unsigned int pieceTableLength = OleUtil::getU4Bytes(clx.c_str(), i + 1);
+ pieceTableBuffer = std::string(clx, i + 1 + 4);
+ if (pieceTableBuffer.length() != pieceTableLength) {
+ from = i + 1;
+ continue;
+ }
+ break;
+ }
+ return pieceTableBuffer;
+}
+
+
+bool OleMainStream::readPieceTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string piecesTableBuffer = getPiecesTableBuffer(headerBuffer, tableStream);
+
+ if (piecesTableBuffer.empty()) {
+ return false;
+ }
+
+ //getting count of Character Positions for different types of subdocuments in Main Stream
+ int ccpText = OleUtil::get4Bytes(headerBuffer, 0x004C); //text
+ int ccpFtn = OleUtil::get4Bytes(headerBuffer, 0x0050); //footnote subdocument
+ int ccpHdd = OleUtil::get4Bytes(headerBuffer, 0x0054); //header subdocument
+ int ccpMcr = OleUtil::get4Bytes(headerBuffer, 0x0058); //macro subdocument
+ int ccpAtn = OleUtil::get4Bytes(headerBuffer, 0x005C); //comment subdocument
+ int ccpEdn = OleUtil::get4Bytes(headerBuffer, 0x0060); //endnote subdocument
+ int ccpTxbx = OleUtil::get4Bytes(headerBuffer, 0x0064); //textbox subdocument
+ int ccpHdrTxbx = OleUtil::get4Bytes(headerBuffer, 0x0068); //textbox subdocument of the header
+ int lastCP = ccpFtn + ccpHdd + ccpMcr + ccpAtn + ccpEdn + ccpTxbx + ccpHdrTxbx;
+ if (lastCP != 0) {
+ ++lastCP;
+ }
+ lastCP += ccpText;
+
+ //getting the CP (character positions) and CP descriptors
+ std::vector<int> cp; //array of character positions for pieces
+ unsigned int j = 0;
+ for (j = 0; ; j += 4) {
+ if (piecesTableBuffer.size() < j + 4) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, cp ends not with a lastcp");
+ break;
+ }
+ int curCP = OleUtil::get4Bytes(piecesTableBuffer.c_str(), j);
+ cp.push_back(curCP);
+ if (curCP == lastCP) {
+ break;
+ }
+ }
+
+ if (cp.size() < 2) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, < 2 pieces");
+ return false;
+ }
+
+ std::vector<std::string> descriptors;
+ for (std::size_t k = 0; k < cp.size() - 1; ++k) {
+ //j + 4, because it should be taken after CP in PiecesTable Buffer
+ //k * 8, because it should be taken 8 byte for each descriptor
+ std::size_t substrFrom = j + 4 + k * 8;
+ if (piecesTableBuffer.size() < substrFrom + 8) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, problems with descriptors reading");
+ break;
+ }
+ descriptors.push_back(piecesTableBuffer.substr(substrFrom, 8));
+ }
+
+ //filling the Pieces vector
+ std::size_t minValidSize = std::min(cp.size() - 1, descriptors.size());
+ if (minValidSize == 0) {
+ ZLLogger::Instance().println("DocPlugin", "invalid piece table, there are no pieces");
+ return false;
+ }
+
+ for (std::size_t i = 0; i < minValidSize; ++i) {
+ //4byte integer with offset and ANSI flag
+ int fcValue = OleUtil::get4Bytes(descriptors.at(i).c_str(), 0x2); //offset for piece structure
+ Piece piece;
+ piece.IsANSI = (fcValue & 0x40000000) == 0x40000000; //ansi flag
+ piece.Offset = fcValue & 0x3FFFFFFF; //gettting offset for current piece
+ piece.Length = cp.at(i + 1) - cp.at(i);
+ myPieces.push_back(piece);
+ }
+
+ //split pieces into different types
+ Pieces piecesText, piecesFootnote, piecesOther;
+ splitPieces(myPieces, piecesText, piecesFootnote, Piece::PIECE_TEXT, Piece::PIECE_FOOTNOTE, ccpText);
+ splitPieces(piecesFootnote, piecesFootnote, piecesOther, Piece::PIECE_FOOTNOTE, Piece::PIECE_OTHER, ccpFtn);
+
+ myPieces.clear();
+ for (std::size_t i = 0; i < piecesText.size(); ++i) {
+ myPieces.push_back(piecesText.at(i));
+ }
+ for (std::size_t i = 0; i < piecesFootnote.size(); ++i) {
+ myPieces.push_back(piecesFootnote.at(i));
+ }
+ for (std::size_t i = 0; i < piecesOther.size(); ++i) {
+ myPieces.push_back(piecesOther.at(i));
+ }
+
+ //converting length and offset depending on isANSI
+ for (std::size_t i = 0; i < myPieces.size(); ++i) {
+ Piece &piece = myPieces.at(i);
+ if (!piece.IsANSI) {
+ piece.Length *= 2;
+ } else {
+ piece.Offset /= 2;
+ }
+ }
+
+ //filling startCP field
+ unsigned int curStartCP = 0;
+ for (std::size_t i = 0; i < myPieces.size(); ++i) {
+ Piece &piece = myPieces.at(i);
+ piece.startCP = curStartCP;
+ if (piece.IsANSI) {
+ curStartCP += piece.Length;
+ } else {
+ curStartCP += piece.Length / 2;
+ }
+ }
+ return true;
+}
+
+bool OleMainStream::readBookmarks(const char *headerBuffer, const OleEntry &tableEntry) {
+ //SttbfBkmk structure is a table of bookmark name strings
+ unsigned int beginNamesInfo = OleUtil::getU4Bytes(headerBuffer, 0x142); // address of SttbfBkmk structure
+ std::size_t namesInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x146); // length of SttbfBkmk structure
+
+ if (namesInfoLength == 0) {
+ return true; //there's no bookmarks
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginNamesInfo, namesInfoLength, tableStream)) {
+ return false;
+ }
+
+ unsigned int recordsNumber = OleUtil::getU2Bytes(buffer.c_str(), 0x2); //count of records
+
+ std::vector<std::string> names;
+ unsigned int offset = 0x6; //initial offset
+ for (unsigned int i = 0; i < recordsNumber; ++i) {
+ if (buffer.size() < offset + 2) {
+ ZLLogger::Instance().println("DocPlugin", "problmes with reading bookmarks names");
+ break;
+ }
+ unsigned int length = OleUtil::getU2Bytes(buffer.c_str(), offset) * 2; //length of string in bytes
+ ZLUnicodeUtil::Ucs2String name;
+ for (unsigned int j = 0; j < length; j+=2) {
+ char ch1 = buffer.at(offset + 2 + j);
+ char ch2 = buffer.at(offset + 2 + j + 1);
+ ZLUnicodeUtil::Ucs2Char ucs2Char = (unsigned int)ch1 | ((unsigned int)ch2 << 8);
+ name.push_back(ucs2Char);
+ }
+ std::string utf8Name;
+ ZLUnicodeUtil::ucs2ToUtf8(utf8Name, name);
+ names.push_back(utf8Name);
+ offset += length + 2;
+ }
+
+ //plcfBkmkf structure is table recording beginning CPs of bookmarks
+ unsigned int beginCharPosInfo = OleUtil::getU4Bytes(headerBuffer, 0x14A); // address of plcfBkmkf structure
+ std::size_t charPosInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x14E); // length of plcfBkmkf structure
+
+ if (charPosInfoLen == 0) {
+ return true; //there's no bookmarks
+ }
+
+ if (!readToBuffer(buffer, beginCharPosInfo, charPosInfoLen, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int BKF_SIZE = 4;
+ std::size_t size = calcCountOfPLC(charPosInfoLen, BKF_SIZE);
+ std::vector<unsigned int> charPage;
+ for (std::size_t index = 0, offset = 0; index < size; ++index, offset += 4) {
+ charPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
+ }
+
+ for (std::size_t i = 0; i < names.size(); ++i) {
+ if (i >= charPage.size()) {
+ break; //for the case if something in these structures goes wrong, to not to lose all bookmarks
+ }
+ Bookmark bookmark;
+ bookmark.CharPosition = charPage.at(i);
+ bookmark.Name = names.at(i);
+ myBookmarks.push_back(bookmark);
+ }
+
+ return true;
+}
+
+bool OleMainStream::readStylesheet(const char *headerBuffer, const OleEntry &tableEntry) {
+ //STSH structure is a stylesheet
+ unsigned int beginStshInfo = OleUtil::getU4Bytes(headerBuffer, 0xa2); // address of STSH structure
+ std::size_t stshInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xa6); // length of STSH structure
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ char *buffer = new char[stshInfoLength];
+ if (!tableStream.seek(beginStshInfo, true)) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure");
+ return false;
+ }
+ if (tableStream.read(buffer, stshInfoLength) != stshInfoLength) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure, invalid length");
+ return false;
+ }
+
+ std::size_t stdCount = (std::size_t)OleUtil::getU2Bytes(buffer, 2);
+ std::size_t stdBaseInFile = (std::size_t)OleUtil::getU2Bytes(buffer, 4);
+ myStyleSheet.resize(stdCount);
+
+ std::vector<bool> isFilled;
+ isFilled.resize(stdCount, false);
+
+ std::size_t stdLen = 0;
+ bool styleSheetWasChanged = false;
+ do { //make it in while loop, because some base style can be after their successors
+ styleSheetWasChanged = false;
+ for (std::size_t index = 0, offset = 2 + (std::size_t)OleUtil::getU2Bytes(buffer, 0); index < stdCount; index++, offset += 2 + stdLen) {
+ stdLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset);
+ if (isFilled.at(index)) {
+ continue;
+ }
+
+ if (stdLen == 0) {
+ //if record is empty, left it default
+ isFilled[index] = true;
+ continue;
+ }
+
+ Style styleInfo = myStyleSheet.at(index);
+
+ const unsigned int styleAndBaseType = OleUtil::getU2Bytes(buffer, offset + 4);
+ const unsigned int styleType = styleAndBaseType % 16;
+ const unsigned int baseStyleId = styleAndBaseType / 16;
+ if (baseStyleId == Style::STYLE_NIL || baseStyleId == Style::STYLE_USER) {
+ //if based on nil or user style, left default
+ } else {
+ int baseStyleIndex = getStyleIndex(baseStyleId, isFilled, myStyleSheet);
+ if (baseStyleIndex < 0) {
+ //this base style is not filled yet, so pass it at some time
+ continue;
+ }
+ styleInfo = myStyleSheet.at(baseStyleIndex);
+ styleInfo.StyleIdCurrent = Style::STYLE_INVALID;
+ }
+
+ // parse STD structure
+ unsigned int tmp = OleUtil::getU2Bytes(buffer, offset + 6);
+ unsigned int upxCount = tmp % 16;
+ styleInfo.StyleIdNext = tmp / 16;
+
+ //adding current style
+ myStyleSheet[index] = styleInfo;
+ isFilled[index] = true;
+ styleSheetWasChanged = true;
+
+ std::size_t pos = 2 + stdBaseInFile;
+ std::size_t nameLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
+ nameLen = nameLen * 2 + 2; //from Unicode characters to bytes + Unicode null charachter length
+ pos += 2 + nameLen;
+ if (pos % 2 != 0) {
+ ++pos;
+ }
+ if (pos >= stdLen) {
+ continue;
+ }
+ std::size_t upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
+ if (pos + upxLen > stdLen) {
+ //UPX length too large
+ continue;
+ }
+ //for style info styleType must be equal 1
+ if (styleType == 1 && upxCount >= 1) {
+ if (upxLen >= 2) {
+ styleInfo.StyleIdCurrent = OleUtil::getU2Bytes(buffer, offset + pos + 2);
+ getStyleInfo(0, buffer + offset + pos + 4, upxLen - 2, styleInfo);
+ myStyleSheet[index] = styleInfo;
+ }
+ pos += 2 + upxLen;
+ if (pos % 2 != 0) {
+ ++pos;
+ }
+ upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos);
+ }
+ if (upxLen == 0 || pos + upxLen > stdLen) {
+ //too small/too large
+ continue;
+ }
+ //for char info styleType can be equal 1 or 2
+ if ((styleType == 1 && upxCount >= 2) || (styleType == 2 && upxCount >= 1)) {
+ CharInfo charInfo;
+ getCharInfo(0, Style::STYLE_INVALID, buffer + offset + pos + 2, upxLen, charInfo);
+ styleInfo.CurrentCharInfo = charInfo;
+ myStyleSheet[index] = styleInfo;
+ }
+ }
+ } while (styleSheetWasChanged);
+ delete[] buffer;
+ return true;
+}
+
+bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ //PlcfbteChpx structure is table with formatting for particular run of text
+ unsigned int beginCharInfo = OleUtil::getU4Bytes(headerBuffer, 0xfa); // address of PlcfbteChpx structure
+ std::size_t charInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xfe); // length of PlcfbteChpx structure
+ if (charInfoLength < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginCharInfo, charInfoLength, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int CHPX_SIZE = 4;
+ std::size_t size = calcCountOfPLC(charInfoLength, CHPX_SIZE);
+ std::vector<unsigned int> charBlocks;
+ for (std::size_t index = 0, offset = (size + 1) * 4; index < size; ++index, offset += CHPX_SIZE) {
+ charBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset));
+ }
+
+ char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
+ for (std::size_t index = 0; index < charBlocks.size(); ++index) {
+ seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
+ if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
+ return false;
+ }
+ unsigned int crun = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with crun (count of 'run of text')
+ for (unsigned int index2 = 0; index2 < crun; ++index2) {
+ unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
+ unsigned int chpxOffset = 2 * OleUtil::getU1Byte(formatPageBuffer, (crun + 1) * 4 + index2);
+ unsigned int len = OleUtil::getU1Byte(formatPageBuffer, chpxOffset);
+ unsigned int charPos = 0;
+ if (!offsetToCharPos(offset, charPos, myPieces)) {
+ continue;
+ }
+ unsigned int styleId = getStyleIdByCharPos(charPos, myStyleInfoList);
+
+ CharInfo charInfo = getStyleFromStylesheet(styleId, myStyleSheet).CurrentCharInfo;
+ if (chpxOffset != 0) {
+ getCharInfo(chpxOffset, styleId, formatPageBuffer + 1, len - 1, charInfo);
+ }
+ myCharInfoList.push_back(CharPosToCharInfo(charPos, charInfo));
+
+ if (chpxOffset != 0) {
+ InlineImageInfo pictureInfo;
+ if (getInlineImageInfo(chpxOffset, formatPageBuffer + 1, len - 1, pictureInfo)) {
+ myInlineImageInfoList.push_back(CharPosToInlineImageInfo(charPos, pictureInfo));
+ }
+ }
+
+ }
+ }
+ delete[] formatPageBuffer;
+ return true;
+}
+
+bool OleMainStream::readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry) {
+ //Plcspa structure is a table with information for FSPA (File Shape Address)
+ unsigned int beginPicturesInfo = OleUtil::getU4Bytes(headerBuffer, 0x01DA); // address of Plcspa structure
+ if (beginPicturesInfo == 0) {
+ return true; //there's no office art objects
+ }
+ unsigned int picturesInfoLength = OleUtil::getU4Bytes(headerBuffer, 0x01DE); // length of Plcspa structure
+ if (picturesInfoLength < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginPicturesInfo, picturesInfoLength, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int SPA_SIZE = 26;
+ std::size_t size = calcCountOfPLC(picturesInfoLength, SPA_SIZE);
+
+ std::vector<unsigned int> picturesBlocks;
+ for (std::size_t index = 0, tOffset = 0; index < size; ++index, tOffset += 4) {
+ picturesBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset));
+ }
+
+ for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += SPA_SIZE) {
+ unsigned int spid = OleUtil::getU4Bytes(buffer.c_str(), tOffset);
+ FloatImageInfo info;
+ unsigned int charPos = picturesBlocks.at(index);
+ info.ShapeId = spid;
+ myFloatImageInfoList.push_back(CharPosToFloatImageInfo(charPos, info));
+ }
+
+ //DggInfo structure is office art object table data
+ unsigned int beginOfficeArtContent = OleUtil::getU4Bytes(headerBuffer, 0x22A); // address of DggInfo structure
+ if (beginOfficeArtContent == 0) {
+ return true; //there's no office art objects
+ }
+ unsigned int officeArtContentLength = OleUtil::getU4Bytes(headerBuffer, 0x022E); // length of DggInfo structure
+ if (officeArtContentLength < 4) {
+ return false;
+ }
+
+ shared_ptr<OleStream> newTableStream = new OleStream(myStorage, tableEntry, myBaseStream);
+ shared_ptr<OleStream> newMainStream = new OleStream(myStorage, myOleEntry, myBaseStream);
+ if (newTableStream->open() && newMainStream->open()) {
+ myFLoatImageReader = new DocFloatImageReader(beginOfficeArtContent, officeArtContentLength, newTableStream, newMainStream);
+ myFLoatImageReader->readAll();
+ }
+ return true;
+}
+
+bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ //PlcBtePapx structure is table with formatting for all paragraphs
+ unsigned int beginParagraphInfo = OleUtil::getU4Bytes(headerBuffer, 0x102); // address of PlcBtePapx structure
+ std::size_t paragraphInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x106); // length of PlcBtePapx structure
+ if (paragraphInfoLength < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginParagraphInfo, paragraphInfoLength, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int PAPX_SIZE = 4;
+ std::size_t size = calcCountOfPLC(paragraphInfoLength, PAPX_SIZE);
+
+ std::vector<unsigned int> paragraphBlocks;
+ for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += PAPX_SIZE) {
+ paragraphBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset));
+ }
+
+ char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE];
+ for (std::size_t index = 0; index < paragraphBlocks.size(); ++index) {
+ seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true);
+ if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) {
+ return false;
+ }
+ const unsigned int paragraphsCount = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with 'cpara' value (count of paragraphs)
+ for (unsigned int index2 = 0; index2 < paragraphsCount; ++index2) {
+ const unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4);
+ unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (paragraphsCount + 1) * 4 + index2 * 13) * 2;
+ if (papxOffset <= 0) {
+ continue;
+ }
+ unsigned int len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
+ if (len == 0) {
+ ++papxOffset;
+ len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2;
+ }
+
+ const unsigned int styleId = OleUtil::getU2Bytes(formatPageBuffer, papxOffset + 1);
+ Style styleInfo = getStyleFromStylesheet(styleId, myStyleSheet);
+
+ if (len >= 3) {
+ getStyleInfo(papxOffset, formatPageBuffer + 3, len - 3, styleInfo);
+ }
+
+ unsigned int charPos = 0;
+ if (!offsetToCharPos(offset, charPos, myPieces)) {
+ continue;
+ }
+ myStyleInfoList.push_back(CharPosToStyle(charPos, styleInfo));
+ }
+ }
+ delete[] formatPageBuffer;
+ return true;
+}
+
+bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry) {
+ //PlcfSed structure is a section table
+ unsigned int beginOfText = OleUtil::getU4Bytes(headerBuffer, 0x18); //address of text's begin in main stream
+ unsigned int beginSectInfo = OleUtil::getU4Bytes(headerBuffer, 0xca); //address if PlcfSed structure
+
+ std::size_t sectInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xce); //length of PlcfSed structure
+ if (sectInfoLen < 4) {
+ return false;
+ }
+
+ OleStream tableStream(myStorage, tableEntry, myBaseStream);
+ std::string buffer;
+ if (!readToBuffer(buffer, beginSectInfo, sectInfoLen, tableStream)) {
+ return false;
+ }
+
+ static const unsigned int SED_SIZE = 12;
+ std::size_t decriptorsCount = calcCountOfPLC(sectInfoLen, SED_SIZE);
+
+ //saving the section offsets (in character positions)
+ std::vector<unsigned int> charPos;
+ for (std::size_t index = 0, tOffset = 0; index < decriptorsCount; ++index, tOffset += 4) {
+ unsigned int ulTextOffset = OleUtil::getU4Bytes(buffer.c_str(), tOffset);
+ charPos.push_back(beginOfText + ulTextOffset);
+ }
+
+ //saving sepx offsets
+ std::vector<unsigned int> sectPage;
+ for (std::size_t index = 0, tOffset = (decriptorsCount + 1) * 4; index < decriptorsCount; ++index, tOffset += SED_SIZE) {
+ sectPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset + 2));
+ }
+
+ //reading the section properties
+ char tmpBuffer[2];
+ for (std::size_t index = 0; index < sectPage.size(); ++index) {
+ if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info
+ SectionInfo sectionInfo;
+ sectionInfo.CharPosition = charPos.at(index);
+ mySectionInfoList.push_back(sectionInfo);
+ continue;
+ }
+ //getting number of bytes to read
+ if (!seek(sectPage.at(index), true)) {
+ continue;
+ }
+ if (read(tmpBuffer, 2) != 2) {
+ continue;
+ }
+ std::size_t bytes = 2 + (std::size_t)OleUtil::getU2Bytes(tmpBuffer, 0);
+
+ if (!seek(sectPage.at(index), true)) {
+ continue;
+ }
+ char *formatPageBuffer = new char[bytes];
+ if (read(formatPageBuffer, bytes) != bytes) {
+ delete[] formatPageBuffer;
+ continue;
+ }
+ SectionInfo sectionInfo;
+ sectionInfo.CharPosition = charPos.at(index);
+ getSectionInfo(formatPageBuffer + 2, bytes - 2, sectionInfo);
+ mySectionInfoList.push_back(sectionInfo);
+ delete[] formatPageBuffer;
+ }
+ return true;
+}
+
+void OleMainStream::getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo) {
+ int tmp, toDelete, toAdd;
+ unsigned int offset = 0;
+ while (bytes >= offset + 2) {
+ unsigned int curPrlLength = 0;
+ switch (OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset)) {
+ case 0x2403:
+ styleInfo.Alignment = (Style::AlignmentType)OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x4610:
+ styleInfo.LeftIndent += OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ if (styleInfo.LeftIndent < 0) {
+ styleInfo.LeftIndent = 0;
+ }
+ break;
+ case 0xc60d: // ChgTabsPapx
+ case 0xc615: // ChgTabs
+ tmp = OleUtil::get1Byte(grpprlBuffer, papxOffset + offset + 2);
+ if (tmp < 2) {
+ curPrlLength = 1;
+ break;
+ }
+ toDelete = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 3);
+ if (tmp < 2 + 2 * toDelete) {
+ curPrlLength = 1;
+ break;
+ }
+ toAdd = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 4 + 2 * toDelete);
+ if (tmp < 2 + 2 * toDelete + 2 * toAdd) {
+ curPrlLength = 1;
+ break;
+ }
+ break;
+ case 0x840e:
+ styleInfo.RightIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x840f:
+ styleInfo.LeftIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x8411:
+ styleInfo.FirstLineIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0xa413:
+ styleInfo.BeforeParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0xa414:
+ styleInfo.AfterParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2);
+ break;
+ case 0x2407:
+ styleInfo.HasPageBreakBefore = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2) == 0x01;
+ break;
+ default:
+ break;
+ }
+ if (curPrlLength == 0) {
+ curPrlLength = getPrlLength(grpprlBuffer, papxOffset + offset);
+ }
+ offset += curPrlLength;
+ }
+
+}
+
+void OleMainStream::getCharInfo(unsigned int chpxOffset, unsigned int /*styleId*/, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo) {
+ unsigned int sprm = 0; //single propery modifier
+ unsigned int offset = 0;
+ while (bytes >= offset + 2) {
+ switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) {
+ case 0x0835: //bold
+ sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
+ switch (sprm) {
+ case UNSET:
+ charInfo.FontStyle &= ~CharInfo::FONT_BOLD;
+ break;
+ case SET:
+ charInfo.FontStyle |= CharInfo::FONT_BOLD;
+ break;
+ case UNCHANGED:
+ break;
+ case NEGATION:
+ charInfo.FontStyle ^= CharInfo::FONT_BOLD;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x0836: //italic
+ sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2);
+ switch (sprm) {
+ case UNSET:
+ charInfo.FontStyle &= ~CharInfo::FONT_ITALIC;
+ break;
+ case SET:
+ charInfo.FontStyle |= CharInfo::FONT_ITALIC;
+ break;
+ case UNCHANGED:
+ break;
+ case NEGATION:
+ charInfo.FontStyle ^= CharInfo::FONT_ITALIC;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x4a43: //size of font
+ charInfo.FontSize = OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset + 2);
+ break;
+ default:
+ break;
+ }
+ offset += getPrlLength(grpprlBuffer, chpxOffset + offset);
+ }
+
+}
+
+void OleMainStream::getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo &sectionInfo) {
+ unsigned int tmp;
+ std::size_t offset = 0;
+ while (bytes >= offset + 2) {
+ switch (OleUtil::getU2Bytes(grpprlBuffer, offset)) {
+ case 0x3009: //new page
+ tmp = OleUtil::getU1Byte(grpprlBuffer, offset + 2);
+ sectionInfo.IsNewPage = (tmp != 0 && tmp != 1);
+ break;
+ default:
+ break;
+ }
+ offset += getPrlLength(grpprlBuffer, offset);
+ }
+}
+
+bool OleMainStream::getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo) {
+ //p. 105 of [MS-DOC] documentation
+ unsigned int offset = 0;
+ bool isFound = false;
+ while (bytes >= offset + 2) {
+ switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) {
+ case 0x080a: // ole object, p.107 [MS-DOC]
+ if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) == 0x01) {
+ return false;
+ }
+ break;
+ case 0x0806: // is not a picture, but a binary data? (sprmCFData, p.106 [MS-DOC])
+ if (OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2) == 0x01) {
+ return false;
+ }
+ break;
+// case 0x0855: // sprmCFSpec, p.117 [MS-DOC], MUST BE applied with a value of 1 (see p.105 [MS-DOC])
+// if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) != 0x01) {
+// return false;
+// }
+// break;
+ case 0x6a03: // location p.105 [MS-DOC]
+ pictureInfo.DataPosition = OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2);
+ isFound = true;
+ break;
+ default:
+ break;
+ }
+ offset += getPrlLength(grpprlBuffer, chpxOffset + offset);
+ }
+ return isFound;
+}
+
+OleMainStream::Style OleMainStream::getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet) {
+ //TODO optimize it: StyleSheet can be map structure with styleId key
+ Style style;
+ if (styleId != Style::STYLE_INVALID && styleId != Style::STYLE_NIL && styleId != Style::STYLE_USER) {
+ for (std::size_t index = 0; index < stylesheet.size(); ++index) {
+ if (stylesheet.at(index).StyleIdCurrent == styleId) {
+ return stylesheet.at(index);
+ }
+ }
+ }
+ style.StyleIdCurrent = styleId;
+ return style;
+}
+
+int OleMainStream::getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet) {
+ //TODO optimize it: StyleSheet can be map structure with styleId key
+ //in that case, this method will be excess
+ if (styleId == Style::STYLE_INVALID) {
+ return -1;
+ }
+ for (int index = 0; index < (int)stylesheet.size(); ++index) {
+ if (isFilled.at(index) && stylesheet.at(index).StyleIdCurrent == styleId) {
+ return index;
+ }
+ }
+ return -1;
+}
+
+unsigned int OleMainStream::getStyleIdByCharPos(unsigned int charPos, const StyleInfoList &styleInfoList) {
+ unsigned int styleId = Style::STYLE_INVALID;
+ for (std::size_t i = 0; i < styleInfoList.size(); ++i) {
+ const Style &info = styleInfoList.at(i).second;
+ if (i == styleInfoList.size() - 1) { //if last
+ styleId = info.StyleIdCurrent;
+ break;
+ }
+ unsigned int curOffset = styleInfoList.at(i).first;
+ unsigned int nextOffset = styleInfoList.at(i + 1).first;
+ if (charPos >= curOffset && charPos < nextOffset) {
+ styleId = info.StyleIdCurrent;
+ break;
+ }
+ }
+ return styleId;
+}
+
+bool OleMainStream::offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces) {
+ if (pieces.empty()) {
+ return false;
+ }
+ if ((unsigned int)pieces.front().Offset > offset) {
+ charPos = 0;
+ return true;
+ }
+ if ((unsigned int)(pieces.back().Offset + pieces.back().Length) <= offset) {
+ return false;
+ }
+
+ std::size_t pieceNumber = 0;
+ for (std::size_t i = 0; i < pieces.size(); ++i) {
+ if (i == pieces.size() - 1) { //if last
+ pieceNumber = i;
+ break;
+ }
+ unsigned int curOffset = pieces.at(i).Offset;
+ unsigned int nextOffset = pieces.at(i + 1).Offset;
+ if (offset >= curOffset && offset < nextOffset) {
+ pieceNumber = i;
+ break;
+ }
+ }
+
+ const Piece &piece = pieces.at(pieceNumber);
+ unsigned int diffOffset = offset - piece.Offset;
+ if (!piece.IsANSI) {
+ diffOffset /= 2;
+ }
+ charPos = piece.startCP + diffOffset;
+ return true;
+}
+
+bool OleMainStream::readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream) {
+ char *buffer = new char[length];
+ stream.seek(offset, true);
+ if (stream.read(buffer, length) != length) {
+ return false;
+ }
+ result = std::string(buffer, length);
+ delete[] buffer;
+ return true;
+}
+
+unsigned int OleMainStream::calcCountOfPLC(unsigned int totalSize, unsigned int elementSize) {
+ //calculates count of elements in PLC structure, formula from p.30 [MS-DOC]
+ return (totalSize - 4) / (4 + elementSize);
+}
+
+unsigned int OleMainStream::getPrlLength(const char *grpprlBuffer, unsigned int byteNumber) {
+ unsigned int tmp;
+ unsigned int opCode = OleUtil::getU2Bytes(grpprlBuffer, byteNumber);
+ switch (opCode & 0xe000) {
+ case 0x0000:
+ case 0x2000:
+ return 3;
+ case 0x4000:
+ case 0x8000:
+ case 0xA000:
+ return 4;
+ case 0xE000:
+ return 5;
+ case 0x6000:
+ return 6;
+ case 0xC000:
+ //counting of info length
+ tmp = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 2);
+ if (opCode == 0xc615 && tmp == 255) {
+ unsigned int del = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 3);
+ unsigned int add = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 4 + del * 4);
+ tmp = 2 + del * 4 + add * 3;
+ }
+ return 3 + tmp;
+ default:
+ return 1;
+ }
+}
diff --git a/fbreader/src/formats/doc/OleMainStream.h b/fbreader/src/formats/doc/OleMainStream.h
new file mode 100644
index 0000000..378f037
--- /dev/null
+++ b/fbreader/src/formats/doc/OleMainStream.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLEMAINSTREAM_H__
+#define __OLEMAINSTREAM_H__
+
+#include <vector>
+#include <string>
+
+#include "OleStream.h"
+#include "DocFloatImageReader.h"
+
+class OleMainStream : public OleStream {
+
+public:
+ struct Piece {
+ enum PieceType {
+ PIECE_TEXT,
+ PIECE_FOOTNOTE,
+ PIECE_OTHER
+ };
+
+ int Offset; // TODO: maybe make it unsigned int
+ int Length; // TODO: maybe make it unsigned int
+ bool IsANSI;
+ PieceType Type;
+ unsigned int startCP;
+ };
+ typedef std::vector<Piece> Pieces;
+
+ struct CharInfo {
+ enum Font {
+ FONT_REGULAR = 0,
+ FONT_BOLD = 1 << 0,
+ FONT_ITALIC = 1 << 1,
+ FONT_UNDERLINE = 1 << 2,
+ FONT_CAPITALS = 1 << 3,
+ FONT_SMALL_CAPS = 1 << 4,
+ FONT_STRIKE = 1 << 5,
+ FONT_HIDDEN = 1 << 6,
+ FONT_MARKDEL = 1 << 7,
+ FONT_SUPERSCRIPT = 1 << 8,
+ FONT_SUBSCRIPT = 1 << 9
+ };
+
+ unsigned int FontStyle;
+ unsigned int FontSize;
+
+ CharInfo();
+ };
+ typedef std::pair<unsigned int, CharInfo> CharPosToCharInfo;
+ typedef std::vector<CharPosToCharInfo > CharInfoList;
+
+ struct Style {
+ enum AlignmentType {
+ ALIGNMENT_LEFT = 0x00,
+ ALIGNMENT_CENTER = 0x01,
+ ALIGNMENT_RIGHT = 0x02,
+ ALIGNMENT_JUSTIFY = 0x03,
+ ALIGNMENT_DEFAULT // for case if alignment is not setted by word
+ };
+
+ // style Ids:
+ // (this is not full list of possible style ids, enum is used for using in switch-case)
+ enum StyleID {
+ STYLE_H1 = 0x1,
+ STYLE_H2 = 0x2,
+ STYLE_H3 = 0x3,
+ STYLE_USER = 0xFFE,
+ STYLE_NIL = 0xFFF,
+ STYLE_INVALID = 0xFFFF
+ };
+
+ unsigned int StyleIdCurrent;
+ unsigned int StyleIdNext; // Next style unless overruled
+
+ bool HasPageBreakBefore;
+ unsigned int BeforeParagraphIndent; // Vertical indent before paragraph, pixels
+ unsigned int AfterParagraphIndent; // Vertical indent after paragraph, pixels
+ int LeftIndent;
+ int FirstLineIndent;
+ int RightIndent;
+ AlignmentType Alignment;
+ CharInfo CurrentCharInfo;
+
+ Style();
+ };
+
+ typedef std::pair<unsigned int, Style> CharPosToStyle;
+ typedef std::vector<CharPosToStyle> StyleInfoList;
+ typedef std::vector<Style> StyleSheet;
+
+ struct SectionInfo {
+ unsigned int CharPosition;
+ bool IsNewPage;
+
+ SectionInfo();
+ };
+ typedef std::vector<SectionInfo> SectionInfoList;
+
+ struct Bookmark {
+ unsigned int CharPosition;
+ std::string Name;
+ };
+ typedef std::vector<Bookmark> BookmarksList;
+
+ struct InlineImageInfo {
+ unsigned int DataPosition;
+
+ InlineImageInfo();
+ };
+ typedef std::pair<unsigned int, InlineImageInfo> CharPosToInlineImageInfo;
+ typedef std::vector<CharPosToInlineImageInfo> InlineImageInfoList;
+
+ struct FloatImageInfo {
+ unsigned int ShapeId;
+ FloatImageInfo();
+ };
+ typedef std::pair<unsigned int, FloatImageInfo> CharPosToFloatImageInfo;
+ typedef std::vector<CharPosToFloatImageInfo> FloatImageInfoList;
+
+ enum ImageType { //see p. 60 [MS-ODRAW]
+ IMAGE_EMF = 0xF01A,
+ IMAGE_WMF = 0xF01B,
+ IMAGE_PICT = 0xF01C,
+ IMAGE_JPEG = 0xF01D,
+ IMAGE_PNG = 0xF01E,
+ IMAGE_DIB = 0xF01F,
+ IMAGE_TIFF = 0xF029,
+ IMAGE_JPEG2 = 0xF02A
+ };
+
+public:
+ OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
+
+public:
+ bool open(bool doReadFormattingData);
+ const Pieces &getPieces() const;
+ const CharInfoList &getCharInfoList() const;
+ const StyleInfoList &getStyleInfoList() const;
+ const BookmarksList &getBookmarks() const;
+ const InlineImageInfoList &getInlineImageInfoList() const;
+ const FloatImageInfoList &getFloatImageInfoList() const;
+
+ ZLFileImage::Blocks getFloatImage(unsigned int shapeId) const;
+ ZLFileImage::Blocks getInlineImage(unsigned int dataPos) const;
+
+private:
+ bool readFIB(const char *headerBuffer);
+ bool readPieceTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readBookmarks(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readStylesheet(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry);
+ bool readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry);
+
+private: //readPieceTable helpers methods
+ static std::string getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream);
+ static void splitPieces(const Pieces &source, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary);
+
+private: //formatting reader helpers methods
+ static unsigned int getPrlLength(const char *grpprlBuffer, unsigned int byteNumber);
+ static void getCharInfo(unsigned int chpxOffset, unsigned int styleId, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo);
+ static void getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo);
+ static void getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo &sectionInfo);
+ static bool getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo);
+
+ static Style getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet);
+ static int getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet);
+ static unsigned int getStyleIdByCharPos(unsigned int offset, const StyleInfoList &styleInfoList);
+
+ static bool offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces);
+ static bool readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream);
+
+ static unsigned int calcCountOfPLC(unsigned int totalSize, unsigned int elementSize);
+
+private:
+ enum PrlFlag {
+ UNSET = 0,
+ SET = 1,
+ UNCHANGED = 128,
+ NEGATION = 129
+ };
+
+private:
+ int myStartOfText;
+ int myEndOfText;
+
+ Pieces myPieces;
+
+ StyleSheet myStyleSheet;
+
+ CharInfoList myCharInfoList;
+ StyleInfoList myStyleInfoList;
+ SectionInfoList mySectionInfoList;
+ InlineImageInfoList myInlineImageInfoList;
+ FloatImageInfoList myFloatImageInfoList;
+
+ BookmarksList myBookmarks;
+
+ shared_ptr<OleStream> myDataStream;
+
+ shared_ptr<DocFloatImageReader> myFLoatImageReader;
+};
+
+#endif /* __OLEMAINSTREAM_H__ */
diff --git a/fbreader/src/formats/doc/OleStorage.cpp b/fbreader/src/formats/doc/OleStorage.cpp
new file mode 100644
index 0000000..a7ab81a
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStorage.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleStorage.h"
+#include "OleUtil.h"
+
+#include <cstring>
+
+const std::size_t OleStorage::BBD_BLOCK_SIZE = 512;
+
+OleStorage::OleStorage() {
+ clear();
+}
+
+void OleStorage::clear() {
+ myInputStream = 0;
+ mySectorSize = 0;
+ myShortSectorSize = 0;
+ myStreamSize = 0;
+ myRootEntryIndex = -1;
+
+ myDIFAT.clear();
+ myBBD.clear();
+ mySBD.clear();
+ myProperties.clear();
+ myEntries.clear();
+}
+
+
+
+bool OleStorage::init(shared_ptr<ZLInputStream> stream, std::size_t streamSize) {
+ clear();
+
+ myInputStream = stream;
+ myStreamSize = streamSize;
+ myInputStream->seek(0, true);
+
+ char oleBuf[BBD_BLOCK_SIZE];
+ std::size_t ret = myInputStream->read(oleBuf, BBD_BLOCK_SIZE);
+ if (ret != BBD_BLOCK_SIZE) {
+ clear();
+ return false;
+ }
+ static const char OLE_SIGN[] = {(char)0xD0, (char)0xCF, (char)0x11, (char)0xE0, (char)0xA1, (char)0xB1, (char)0x1A, (char)0xE1, 0};
+ if (std::strncmp(oleBuf, OLE_SIGN, 8) != 0) {
+ clear();
+ return false;
+ }
+ mySectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x1e); //offset for value of big sector size
+ myShortSectorSize = 1 << OleUtil::getU2Bytes(oleBuf, 0x20); //offset for value of small sector size
+
+ if (readDIFAT(oleBuf) && readBBD(oleBuf) && readSBD(oleBuf) && readProperties(oleBuf) && readAllEntries()) {
+ return true;
+ }
+ clear();
+ return false;
+}
+
+bool OleStorage::readDIFAT(char *oleBuf) {
+ int difatBlock = OleUtil::get4Bytes(oleBuf, 0x44); //address for first difat sector
+ int difatSectorNumbers = OleUtil::get4Bytes(oleBuf, 0x48); //numbers of additional difat records
+
+ //436 of difat records are stored in header, by offset 0x4c
+ for (unsigned int i = 0; i < 436; i += 4) {
+ myDIFAT.push_back(OleUtil::get4Bytes(oleBuf + 0x4c, i));
+ }
+
+ //for files > 6.78 mb we need read additional DIFAT fields
+ for (int i = 0; difatBlock > 0 && i < difatSectorNumbers; ++i) {
+ ZLLogger::Instance().println("DocPlugin", "Read additional data for DIFAT");
+ char buffer[mySectorSize];
+ myInputStream->seek(BBD_BLOCK_SIZE + difatBlock * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "Error read DIFAT!");
+ return false;
+ }
+ for (unsigned int j = 0; j < (mySectorSize - 4); j += 4) {
+ myDIFAT.push_back(OleUtil::get4Bytes(buffer, j));
+ }
+ difatBlock = OleUtil::get4Bytes(buffer, mySectorSize - 4); //next DIFAT block is pointed at the end of the sector
+ }
+
+ //removing unusable DIFAT links
+ //0xFFFFFFFF means "free section"
+ while (!myDIFAT.empty() && myDIFAT.back() == (int)0xFFFFFFFF) {
+ myDIFAT.pop_back();
+ }
+ return true;
+}
+
+bool OleStorage::readBBD(char *oleBuf) {
+ char buffer[mySectorSize];
+ unsigned int bbdNumberBlocks = OleUtil::getU4Bytes(oleBuf, 0x2c); //number of big blocks
+
+ if (myDIFAT.size() < bbdNumberBlocks) {
+ //TODO maybe add check on myDIFAT == bbdNumberBlocks
+ ZLLogger::Instance().println("DocPlugin", "Wrong number of FAT blocks value");
+ return false;
+ }
+
+ for (unsigned int i = 0; i < bbdNumberBlocks; ++i) {
+ int bbdSector = myDIFAT.at(i);
+ if (bbdSector >= (int)(myStreamSize / mySectorSize) || bbdSector < 0) {
+ ZLLogger::Instance().println("DocPlugin", "Bad BBD entry!");
+ return false;
+ }
+ myInputStream->seek(BBD_BLOCK_SIZE + bbdSector * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "Error during reading BBD!");
+ return false;
+ }
+ for (unsigned int j = 0; j < mySectorSize; j += 4) {
+ myBBD.push_back(OleUtil::get4Bytes(buffer, j));
+ }
+ }
+ return true;
+}
+
+bool OleStorage::readSBD(char *oleBuf) {
+ int sbdCur = OleUtil::get4Bytes(oleBuf, 0x3c); //address of first small sector
+ int sbdCount = OleUtil::get4Bytes(oleBuf, 0x40); //count of small sectors
+
+ if (sbdCur <= 0) {
+ ZLLogger::Instance().println("DocPlugin", "There's no SBD, don't read it");
+ return true;
+ }
+
+ char buffer[mySectorSize];
+ for (int i = 0; i < sbdCount; ++i) {
+ if (i != 0) {
+ if (sbdCur < 0 || (unsigned int)sbdCur >= myBBD.size()) {
+ ZLLogger::Instance().println("DocPlugin", "error during parsing SBD");
+ return false;
+ }
+ sbdCur = myBBD.at(sbdCur);
+ }
+ if (sbdCur <= 0) {
+ break;
+ }
+ myInputStream->seek(BBD_BLOCK_SIZE + sbdCur * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "reading error during parsing SBD");
+ return false;
+ }
+ for (unsigned int j = 0; j < mySectorSize; j += 4) {
+ mySBD.push_back(OleUtil::get4Bytes(buffer, j));
+ }
+
+ }
+ return true;
+}
+
+bool OleStorage::readProperties(char *oleBuf) {
+ int propCur = OleUtil::get4Bytes(oleBuf, 0x30); //offset for address of sector with first property
+ if (propCur < 0) {
+ ZLLogger::Instance().println("DocPlugin", "Wrong first directory sector location");
+ return false;
+ }
+
+ char buffer[mySectorSize];
+ do {
+ myInputStream->seek(BBD_BLOCK_SIZE + propCur * mySectorSize, true);
+ if (myInputStream->read(buffer, mySectorSize) != mySectorSize) {
+ ZLLogger::Instance().println("DocPlugin", "Error during reading properties");
+ return false;
+ }
+ for (unsigned int j = 0; j < mySectorSize; j += 128) {
+ myProperties.push_back(std::string(buffer + j, 128));
+ }
+ if (propCur < 0 || (std::size_t)propCur >= myBBD.size()) {
+ break;
+ }
+ propCur = myBBD.at(propCur);
+ } while (propCur >= 0 && propCur < (int)(myStreamSize / mySectorSize));
+ return true;
+}
+
+bool OleStorage::readAllEntries() {
+ int propCount = myProperties.size();
+ for (int i = 0; i < propCount; ++i) {
+ OleEntry entry;
+ bool result = readOleEntry(i, entry);
+ if (!result) {
+ break;
+ }
+ if (entry.type == OleEntry::ROOT_DIR) {
+ myRootEntryIndex = i;
+ }
+ myEntries.push_back(entry);
+ }
+ if (myRootEntryIndex < 0) {
+ return false;
+ }
+ return true;
+}
+
+bool OleStorage::readOleEntry(int propNumber, OleEntry &e) {
+ static const std::string ROOT_ENTRY = "Root Entry";
+
+ std::string property = myProperties.at(propNumber);
+
+ char oleType = property.at(0x42); //offset for Ole Type
+ if (oleType != 1 && oleType != 2 && oleType != 3 && oleType != 5) {
+ ZLLogger::Instance().println("DocPlugin", "entry -- not right ole type");
+ return false;
+ }
+
+ e.type = (OleEntry::Type)oleType;
+
+ int nameLength = OleUtil::getU2Bytes(property.c_str(), 0x40); //offset for value entry's name length
+ e.name.clear();
+ e.name.reserve(33); //max size of entry name
+
+ if ((unsigned int)nameLength >= property.size()) {
+ return false;
+ }
+ for (int i = 0; i < nameLength; i+=2) {
+ char c = property.at(i);
+ if (c != 0) {
+ e.name += c;
+ }
+ }
+
+ e.length = OleUtil::getU4Bytes(property.c_str(), 0x78); //offset for entry's length value
+ e.isBigBlock = e.length >= 0x1000 || e.name == ROOT_ENTRY;
+
+ // Read sector chain
+ if (property.size() < 0x74 + 4) {
+ ZLLogger::Instance().println("DocPlugin", "problems with reading ole entry");
+ return false;
+ }
+ int chainCur = OleUtil::get4Bytes(property.c_str(), 0x74); //offset for start block of entry
+ if (chainCur >= 0 && (chainCur <= (int)(myStreamSize / (e.isBigBlock ? mySectorSize : myShortSectorSize)))) {
+ //filling blocks with chains
+ do {
+ e.blocks.push_back((unsigned int)chainCur);
+ if (e.isBigBlock && (std::size_t)chainCur < myBBD.size()) {
+ chainCur = myBBD.at(chainCur);
+ } else if (!mySBD.empty() && (std::size_t)chainCur < mySBD.size()) {
+ chainCur = mySBD.at(chainCur);
+ } else {
+ chainCur = -1;
+ }
+ } while (chainCur > 0 &&
+ chainCur < (int)(e.isBigBlock ? myBBD.size() : mySBD.size()) &&
+ e.blocks.size() <= e.length / (e.isBigBlock ? mySectorSize : myShortSectorSize));
+ }
+ e.length = std::min(e.length, (unsigned int)((e.isBigBlock ? mySectorSize : myShortSectorSize) * e.blocks.size()));
+ return true;
+}
+
+bool OleStorage::countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const {
+ //TODO maybe better syntax can be used?
+ if (e.blocks.size() <= (std::size_t)blockNumber) {
+ ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, blockNumber is invalid");
+ return false;
+ }
+ if (e.isBigBlock) {
+ result = BBD_BLOCK_SIZE + e.blocks.at(blockNumber) * mySectorSize;
+ } else {
+ unsigned int sbdPerSector = mySectorSize / myShortSectorSize;
+ unsigned int sbdSectorNumber = e.blocks.at(blockNumber) / sbdPerSector;
+ unsigned int sbdSectorMod = e.blocks.at(blockNumber) % sbdPerSector;
+ if (myEntries.at(myRootEntryIndex).blocks.size() <= (std::size_t)sbdSectorNumber) {
+ ZLLogger::Instance().println("DocPlugin", "countFileOffsetOfBlock can't be done, invalid sbd data");
+ return false;
+ }
+ result = BBD_BLOCK_SIZE + myEntries.at(myRootEntryIndex).blocks.at(sbdSectorNumber) * mySectorSize + sbdSectorMod * myShortSectorSize;
+ }
+ return true;
+}
+
+bool OleStorage::getEntryByName(std::string name, OleEntry &returnEntry) const {
+ //TODO fix the workaround for duplicates streams: now it takes a stream with max length
+ unsigned int maxLength = 0;
+ for (std::size_t i = 0; i < myEntries.size(); ++i) {
+ const OleEntry &entry = myEntries.at(i);
+ if (entry.name == name && entry.length >= maxLength) {
+ returnEntry = entry;
+ maxLength = entry.length;
+ }
+ }
+ return maxLength > 0;
+}
+
+
diff --git a/fbreader/src/formats/doc/OleStorage.h b/fbreader/src/formats/doc/OleStorage.h
new file mode 100644
index 0000000..584ee94
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStorage.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTORAGE_H__
+#define __OLESTORAGE_H__
+
+#include <algorithm>
+#include <vector>
+#include <string>
+
+#include <ZLInputStream.h>
+
+struct OleEntry {
+ enum Type {
+ DIR = 1,
+ STREAM = 2,
+ ROOT_DIR = 5,
+ LOCK_BYTES =3
+ };
+
+ typedef std::vector<unsigned int> Blocks;
+
+ std::string name;
+ unsigned int length;
+ Type type;
+ Blocks blocks;
+ bool isBigBlock;
+};
+
+class OleStorage {
+
+public:
+ static const std::size_t BBD_BLOCK_SIZE;
+
+public:
+ OleStorage();
+ bool init(shared_ptr<ZLInputStream>, std::size_t streamSize);
+ void clear();
+ const std::vector<OleEntry> &getEntries() const;
+ bool getEntryByName(std::string name, OleEntry &entry) const;
+
+ unsigned int getSectorSize() const;
+ unsigned int getShortSectorSize() const;
+
+public: //TODO make private
+ bool countFileOffsetOfBlock(const OleEntry &e, unsigned int blockNumber, unsigned int &result) const;
+
+private:
+ bool readDIFAT(char *oleBuf);
+ bool readBBD(char *oleBuf);
+ bool readSBD(char *oleBuf);
+ bool readProperties(char *oleBuf);
+
+ bool readAllEntries();
+ bool readOleEntry(int propNumber, OleEntry &entry);
+
+private:
+
+ shared_ptr<ZLInputStream> myInputStream;
+ unsigned int mySectorSize, myShortSectorSize;
+
+ std::size_t myStreamSize;
+ std::vector<int> myDIFAT; //double-indirect file allocation table
+ std::vector<int> myBBD; //Big Block Depot
+ std::vector<int> mySBD; //Small Block Depot
+ std::vector<std::string> myProperties;
+ std::vector<OleEntry> myEntries;
+ int myRootEntryIndex;
+
+};
+
+inline const std::vector<OleEntry> &OleStorage::getEntries() const { return myEntries; }
+inline unsigned int OleStorage::getSectorSize() const { return mySectorSize; }
+inline unsigned int OleStorage::getShortSectorSize() const { return myShortSectorSize; }
+
+#endif /* __OLESTORAGE_H__ */
diff --git a/fbreader/src/formats/doc/OleStream.cpp b/fbreader/src/formats/doc/OleStream.cpp
new file mode 100644
index 0000000..8de1cc4
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStream.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleStream.h"
+#include "OleUtil.h"
+
+OleStream::OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) :
+ myStorage(storage),
+ myOleEntry(oleEntry),
+ myBaseStream(stream) {
+ myOleOffset = 0;
+}
+
+
+bool OleStream::open() {
+ if (myOleEntry.type != OleEntry::STREAM) {
+ return false;
+ }
+ return true;
+}
+
+std::size_t OleStream::read(char *buffer, std::size_t maxSize) {
+ std::size_t length = maxSize;
+ std::size_t readedBytes = 0;
+ std::size_t bytesLeftInCurBlock;
+ unsigned int newFileOffset;
+
+ unsigned int curBlockNumber, modBlock;
+ std::size_t toReadBlocks, toReadBytes;
+
+ if (myOleOffset + length > myOleEntry.length) {
+ length = myOleEntry.length - myOleOffset;
+ }
+
+ std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+
+ curBlockNumber = myOleOffset / sectorSize;
+ if (curBlockNumber >= myOleEntry.blocks.size()) {
+ return 0;
+ }
+ modBlock = myOleOffset % sectorSize;
+ bytesLeftInCurBlock = sectorSize - modBlock;
+ if (bytesLeftInCurBlock < length) {
+ toReadBlocks = (length - bytesLeftInCurBlock) / sectorSize;
+ toReadBytes = (length - bytesLeftInCurBlock) % sectorSize;
+ } else {
+ toReadBlocks = toReadBytes = 0;
+ }
+
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return 0;
+ }
+ newFileOffset += modBlock;
+
+ myBaseStream->seek(newFileOffset, true);
+
+ readedBytes = myBaseStream->read(buffer, std::min(length, bytesLeftInCurBlock));
+ for (std::size_t i = 0; i < toReadBlocks; ++i) {
+ if (++curBlockNumber >= myOleEntry.blocks.size()) {
+ break;
+ }
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return readedBytes;
+ }
+ myBaseStream->seek(newFileOffset, true);
+ readedBytes += myBaseStream->read(buffer + readedBytes, std::min(length - readedBytes, sectorSize));
+ }
+ if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) {
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return readedBytes;
+ }
+ myBaseStream->seek(newFileOffset, true);
+ readedBytes += myBaseStream->read(buffer + readedBytes, toReadBytes);
+ }
+ myOleOffset += readedBytes;
+ return readedBytes;
+}
+
+bool OleStream::eof() const {
+ return (myOleOffset >= myOleEntry.length);
+}
+
+
+void OleStream::close() {
+}
+
+bool OleStream::seek(unsigned int offset, bool absoluteOffset) {
+ unsigned int newOleOffset = 0;
+ unsigned int newFileOffset;
+
+ if (absoluteOffset) {
+ newOleOffset = offset;
+ } else {
+ newOleOffset = myOleOffset + offset;
+ }
+
+ newOleOffset = std::min(newOleOffset, myOleEntry.length);
+
+ unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+ unsigned int blockNumber = newOleOffset / sectorSize;
+ if (blockNumber >= myOleEntry.blocks.size()) {
+ return false;
+ }
+
+ unsigned int modBlock = newOleOffset % sectorSize;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, blockNumber, newFileOffset)) {
+ return false;
+ }
+ newFileOffset += modBlock;
+ myBaseStream->seek(newFileOffset, true);
+ myOleOffset = newOleOffset;
+ return true;
+}
+
+std::size_t OleStream::offset() {
+ return myOleOffset;
+}
+
+ZLFileImage::Blocks OleStream::getBlockPieceInfoList(unsigned int offset, unsigned int size) const {
+ ZLFileImage::Blocks list;
+ unsigned int sectorSize = (myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+ unsigned int curBlockNumber = offset / sectorSize;
+ if (curBlockNumber >= myOleEntry.blocks.size()) {
+ return list;
+ }
+ unsigned int modBlock = offset % sectorSize;
+ unsigned int startFileOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, startFileOffset)) {
+ return ZLFileImage::Blocks();
+ }
+ startFileOffset += modBlock;
+
+ unsigned int bytesLeftInCurBlock = sectorSize - modBlock;
+ unsigned int toReadBlocks = 0, toReadBytes = 0;
+ if (bytesLeftInCurBlock < size) {
+ toReadBlocks = (size - bytesLeftInCurBlock) / sectorSize;
+ toReadBytes = (size - bytesLeftInCurBlock) % sectorSize;
+ }
+
+ unsigned int readedBytes = std::min(size, bytesLeftInCurBlock);
+ list.push_back(ZLFileImage::Block(startFileOffset, readedBytes));
+
+ for (unsigned int i = 0; i < toReadBlocks; ++i) {
+ if (++curBlockNumber >= myOleEntry.blocks.size()) {
+ break;
+ }
+ unsigned int newFileOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int readbytes = std::min(size - readedBytes, sectorSize);
+ list.push_back(ZLFileImage::Block(newFileOffset, readbytes));
+ readedBytes += readbytes;
+ }
+ if (toReadBytes > 0 && ++curBlockNumber < myOleEntry.blocks.size()) {
+ unsigned int newFileOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, newFileOffset)) {
+ return ZLFileImage::Blocks();
+ }
+ unsigned int readbytes = toReadBytes;
+ list.push_back(ZLFileImage::Block(newFileOffset, readbytes));
+ readedBytes += readbytes;
+ }
+
+ return concatBlocks(list);
+}
+
+ZLFileImage::Blocks OleStream::concatBlocks(const ZLFileImage::Blocks &blocks) {
+ if (blocks.size() < 2) {
+ return blocks;
+ }
+ ZLFileImage::Blocks optList;
+ ZLFileImage::Block curBlock = blocks.at(0);
+ unsigned int nextOffset = curBlock.offset + curBlock.size;
+ for (std::size_t i = 1; i < blocks.size(); ++i) {
+ ZLFileImage::Block b = blocks.at(i);
+ if (b.offset == nextOffset) {
+ curBlock.size += b.size;
+ nextOffset += b.size;
+ } else {
+ optList.push_back(curBlock);
+ curBlock = b;
+ nextOffset = curBlock.offset + curBlock.size;
+ }
+ }
+ optList.push_back(curBlock);
+ return optList;
+}
+
+std::size_t OleStream::fileOffset() {
+ //TODO maybe remove this method, it doesn't use at this time
+ std::size_t sectorSize = (std::size_t)(myOleEntry.isBigBlock ? myStorage->getSectorSize() : myStorage->getShortSectorSize());
+ unsigned int curBlockNumber = myOleOffset / sectorSize;
+ if (curBlockNumber >= myOleEntry.blocks.size()) {
+ return 0;
+ }
+ unsigned int modBlock = myOleOffset % sectorSize;
+ unsigned int curOffset = 0;
+ if (!myStorage->countFileOffsetOfBlock(myOleEntry, curBlockNumber, curOffset)) {
+ return 0; //TODO maybe remove -1?
+ }
+ return curOffset + modBlock;
+}
diff --git a/fbreader/src/formats/doc/OleStream.h b/fbreader/src/formats/doc/OleStream.h
new file mode 100644
index 0000000..861c7cb
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStream.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTREAM_H__
+#define __OLESTREAM_H__
+
+#include <ZLFileImage.h>
+
+#include "OleStorage.h"
+
+class OleStream {
+
+public:
+ OleStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream);
+
+public:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+public:
+ bool seek(unsigned int offset, bool absoluteOffset);
+ std::size_t offset();
+
+public:
+ ZLFileImage::Blocks getBlockPieceInfoList(unsigned int offset, unsigned int size) const;
+ static ZLFileImage::Blocks concatBlocks(const ZLFileImage::Blocks &blocks);
+ std::size_t fileOffset();
+
+public:
+ bool eof() const;
+
+protected:
+ shared_ptr<OleStorage> myStorage;
+
+ OleEntry myOleEntry;
+ shared_ptr<ZLInputStream> myBaseStream;
+
+ unsigned int myOleOffset;
+};
+
+#endif /* __OLESTREAM_H__ */
diff --git a/fbreader/src/formats/doc/OleStreamParser.cpp b/fbreader/src/formats/doc/OleStreamParser.cpp
new file mode 100644
index 0000000..0a9c62d
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStreamParser.cpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+//#include <cctype>
+//#include <cstring>
+
+#include <ZLLogger.h>
+
+#include "OleMainStream.h"
+#include "OleUtil.h"
+#include "OleStreamParser.h"
+
+//word's control chars:
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_FOOTNOTE_MARK = 0x0002;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_TABLE_SEPARATOR = 0x0007;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HORIZONTAL_TAB = 0x0009;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_HARD_LINEBREAK = 0x000b;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_PAGE_BREAK = 0x000c;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_OF_PARAGRAPH = 0x000d;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_MINUS = 0x001e;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SOFT_HYPHEN = 0x001f;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_START_FIELD = 0x0013;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_SEPARATOR_FIELD = 0x0014;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_END_FIELD = 0x0015;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::WORD_ZERO_WIDTH_UNBREAKABLE_SPACE = 0xfeff;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::INLINE_IMAGE = 0x0001;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::FLOAT_IMAGE = 0x0008;
+
+//unicode values:
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::NULL_SYMBOL = 0x0;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::FILE_SEPARATOR = 0x1c;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::LINE_FEED = 0x000a;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::SOFT_HYPHEN = 0xad;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::SPACE = 0x20;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::MINUS = 0x2D;
+const ZLUnicodeUtil::Ucs2Char OleStreamParser::VERTICAL_LINE = 0x7C;
+
+OleStreamParser::OleStreamParser() {
+ myCurBufferPosition = 0;
+
+ myCurCharPos = 0;
+ myNextStyleInfoIndex = 0;
+ myNextCharInfoIndex = 0;
+ myNextBookmarkIndex = 0;
+ myNextInlineImageInfoIndex = 0;
+ myNextFloatImageInfoIndex = 0;
+}
+
+bool OleStreamParser::readStream(OleMainStream &oleMainStream) {
+ ZLUnicodeUtil::Ucs2Char ucs2char;
+ bool tabMode = false;
+ while (getUcs2Char(oleMainStream, ucs2char)) {
+ if (tabMode) {
+ tabMode = false;
+ if (ucs2char == WORD_TABLE_SEPARATOR) {
+ handleTableEndRow();
+ continue;
+ } else {
+ handleTableSeparator();
+ }
+ }
+
+ if (ucs2char < 32) {
+ switch (ucs2char) {
+ case NULL_SYMBOL:
+ break;
+ case WORD_HARD_LINEBREAK:
+ handleHardLinebreak();
+ break;
+ case WORD_END_OF_PARAGRAPH:
+ case WORD_PAGE_BREAK:
+ handleParagraphEnd();
+ break;
+ case WORD_TABLE_SEPARATOR:
+ tabMode = true;
+ break;
+ case WORD_FOOTNOTE_MARK:
+ handleFootNoteMark();
+ break;
+ case WORD_START_FIELD:
+ handleStartField();
+ break;
+ case WORD_SEPARATOR_FIELD:
+ handleSeparatorField();
+ break;
+ case WORD_END_FIELD:
+ handleEndField();
+ break;
+ case INLINE_IMAGE:
+ case FLOAT_IMAGE:
+ break;
+ default:
+ handleOtherControlChar(ucs2char);
+ break;
+ }
+ } else if (ucs2char == WORD_ZERO_WIDTH_UNBREAKABLE_SPACE) {
+ continue; //skip
+ } else {
+ handleChar(ucs2char);
+ }
+ }
+
+ return true;
+}
+
+bool OleStreamParser::getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char) {
+ while (myCurBufferPosition >= myBuffer.size()) {
+ myBuffer.clear();
+ myCurBufferPosition = 0;
+ if (!readNextPiece(stream)) {
+ return false;
+ }
+ }
+ ucs2char = myBuffer.at(myCurBufferPosition++);
+ processStyles(stream);
+
+ switch (ucs2char) {
+ case INLINE_IMAGE:
+ processInlineImage(stream);
+ break;
+ case FLOAT_IMAGE:
+ processFloatImage(stream);
+ break;
+ }
+ ++myCurCharPos;
+ return true;
+}
+
+void OleStreamParser::processInlineImage(OleMainStream &stream) {
+ const OleMainStream::InlineImageInfoList &imageInfoList = stream.getInlineImageInfoList();
+ if (imageInfoList.empty()) {
+ return;
+ }
+ //seek to curCharPos, because not all entries are real pictures
+ while(myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first < myCurCharPos) {
+ ++myNextInlineImageInfoIndex;
+ }
+ while (myNextInlineImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextInlineImageInfoIndex).first == myCurCharPos) {
+ OleMainStream::InlineImageInfo info = imageInfoList.at(myNextInlineImageInfoIndex).second;
+ ZLFileImage::Blocks list = stream.getInlineImage(info.DataPosition);
+ if (!list.empty()) {
+ handleImage(list);
+ }
+ ++myNextInlineImageInfoIndex;
+ }
+}
+
+void OleStreamParser::processFloatImage(OleMainStream &stream) {
+ const OleMainStream::FloatImageInfoList &imageInfoList = stream.getFloatImageInfoList();
+ if (imageInfoList.empty()) {
+ return;
+ }
+ //seek to curCharPos, because not all entries are real pictures
+ while(myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first < myCurCharPos) {
+ ++myNextFloatImageInfoIndex;
+ }
+ while (myNextFloatImageInfoIndex < imageInfoList.size() && imageInfoList.at(myNextFloatImageInfoIndex).first == myCurCharPos) {
+ OleMainStream::FloatImageInfo info = imageInfoList.at(myNextFloatImageInfoIndex).second;
+ ZLFileImage::Blocks list = stream.getFloatImage(info.ShapeId);
+ if (!list.empty()) {
+ handleImage(list);
+ }
+ ++myNextFloatImageInfoIndex;
+ }
+}
+
+void OleStreamParser::processStyles(OleMainStream &stream) {
+ const OleMainStream::StyleInfoList &styleInfoList = stream.getStyleInfoList();
+ if (!styleInfoList.empty()) {
+ while (myNextStyleInfoIndex < styleInfoList.size() && styleInfoList.at(myNextStyleInfoIndex).first == myCurCharPos) {
+ OleMainStream::Style info = styleInfoList.at(myNextStyleInfoIndex).second;
+ handleParagraphStyle(info);
+ ++myNextStyleInfoIndex;
+ }
+ }
+
+ const OleMainStream::CharInfoList &charInfoList = stream.getCharInfoList();
+ if (!charInfoList.empty()) {
+ while (myNextCharInfoIndex < charInfoList.size() && charInfoList.at(myNextCharInfoIndex).first == myCurCharPos) {
+ OleMainStream::CharInfo info = charInfoList.at(myNextCharInfoIndex).second;
+ handleFontStyle(info.FontStyle);
+ ++myNextCharInfoIndex;
+ }
+ }
+
+ const OleMainStream::BookmarksList &bookmarksList = stream.getBookmarks();
+ if (!bookmarksList.empty()) {
+ while (myNextBookmarkIndex < bookmarksList.size() && bookmarksList.at(myNextBookmarkIndex).CharPosition == myCurCharPos) {
+ OleMainStream::Bookmark bookmark = bookmarksList.at(myNextBookmarkIndex);
+ handleBookmark(bookmark.Name);
+ ++myNextBookmarkIndex;
+ }
+ }
+}
diff --git a/fbreader/src/formats/doc/OleStreamParser.h b/fbreader/src/formats/doc/OleStreamParser.h
new file mode 100644
index 0000000..1adec2f
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStreamParser.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTREAMPARSER_H__
+#define __OLESTREAMPARSER_H__
+
+#include <ZLUnicodeUtil.h>
+
+#include "OleMainStream.h"
+#include "OleStreamReader.h"
+
+class OleStreamParser : public OleStreamReader {
+
+public:
+ //word's control chars:
+ static const ZLUnicodeUtil::Ucs2Char WORD_FOOTNOTE_MARK;
+ static const ZLUnicodeUtil::Ucs2Char WORD_TABLE_SEPARATOR;
+ static const ZLUnicodeUtil::Ucs2Char WORD_HORIZONTAL_TAB;
+ static const ZLUnicodeUtil::Ucs2Char WORD_HARD_LINEBREAK;
+ static const ZLUnicodeUtil::Ucs2Char WORD_PAGE_BREAK;
+ static const ZLUnicodeUtil::Ucs2Char WORD_END_OF_PARAGRAPH;
+ static const ZLUnicodeUtil::Ucs2Char WORD_MINUS;
+ static const ZLUnicodeUtil::Ucs2Char WORD_SOFT_HYPHEN;
+ static const ZLUnicodeUtil::Ucs2Char WORD_START_FIELD;
+ static const ZLUnicodeUtil::Ucs2Char WORD_SEPARATOR_FIELD;
+ static const ZLUnicodeUtil::Ucs2Char WORD_END_FIELD;
+ static const ZLUnicodeUtil::Ucs2Char WORD_ZERO_WIDTH_UNBREAKABLE_SPACE;
+ static const ZLUnicodeUtil::Ucs2Char INLINE_IMAGE;
+ static const ZLUnicodeUtil::Ucs2Char FLOAT_IMAGE;
+
+ //unicode values:
+ static const ZLUnicodeUtil::Ucs2Char NULL_SYMBOL;
+ static const ZLUnicodeUtil::Ucs2Char FILE_SEPARATOR;
+ static const ZLUnicodeUtil::Ucs2Char LINE_FEED;
+ static const ZLUnicodeUtil::Ucs2Char SOFT_HYPHEN;
+ static const ZLUnicodeUtil::Ucs2Char SPACE;
+ static const ZLUnicodeUtil::Ucs2Char MINUS;
+ static const ZLUnicodeUtil::Ucs2Char VERTICAL_LINE;
+
+public:
+ OleStreamParser();
+
+private:
+ bool readStream(OleMainStream &stream);
+
+protected:
+ virtual void handleChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
+ virtual void handleHardLinebreak() = 0;
+ virtual void handleParagraphEnd() = 0;
+ virtual void handlePageBreak() = 0;
+ virtual void handleTableSeparator() = 0;
+ virtual void handleTableEndRow() = 0;
+ virtual void handleFootNoteMark() = 0;
+ virtual void handleStartField() = 0;
+ virtual void handleSeparatorField() = 0;
+ virtual void handleEndField() = 0;
+ virtual void handleImage(const ZLFileImage::Blocks &blocks) = 0;
+ virtual void handleOtherControlChar(ZLUnicodeUtil::Ucs2Char ucs2char) = 0;
+
+ virtual void handleFontStyle(unsigned int fontStyle) = 0;
+ virtual void handleParagraphStyle(const OleMainStream::Style &styleInfo) = 0;
+ virtual void handleBookmark(const std::string &name) = 0;
+
+private:
+ bool getUcs2Char(OleMainStream &stream, ZLUnicodeUtil::Ucs2Char &ucs2char);
+ void processInlineImage(OleMainStream &stream);
+ void processFloatImage(OleMainStream &stream);
+ void processStyles(OleMainStream &stream);
+
+private:
+protected:
+ ZLUnicodeUtil::Ucs2String myBuffer;
+private:
+ std::size_t myCurBufferPosition;
+
+ unsigned int myCurCharPos;
+
+ std::size_t myNextStyleInfoIndex;
+ std::size_t myNextCharInfoIndex;
+ std::size_t myNextBookmarkIndex;
+ std::size_t myNextInlineImageInfoIndex;
+ std::size_t myNextFloatImageInfoIndex;
+};
+
+#endif /* __OLESTREAMPARSER_H__ */
diff --git a/fbreader/src/formats/doc/OleStreamReader.cpp b/fbreader/src/formats/doc/OleStreamReader.cpp
new file mode 100644
index 0000000..224489a
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStreamReader.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLLogger.h>
+
+#include "OleMainStream.h"
+#include "OleUtil.h"
+#include "OleStreamReader.h"
+
+OleStreamReader::OleStreamReader() : myNextPieceNumber(0) {
+}
+
+bool OleStreamReader::readDocument(shared_ptr<ZLInputStream> inputStream, bool doReadFormattingData) {
+ static const std::string WORD_DOCUMENT = "WordDocument";
+
+ shared_ptr<OleStorage> storage = new OleStorage;
+
+ if (!storage->init(inputStream, inputStream->sizeOfOpened())) {
+ ZLLogger::Instance().println("DocPlugin", "Broken OLE file");
+ return false;
+ }
+
+ OleEntry wordDocumentEntry;
+ if (!storage->getEntryByName(WORD_DOCUMENT, wordDocumentEntry)) {
+ return false;
+ }
+
+ OleMainStream oleStream(storage, wordDocumentEntry, inputStream);
+ if (!oleStream.open(doReadFormattingData)) {
+ ZLLogger::Instance().println("DocPlugin", "Cannot open OleMainStream");
+ return false;
+ }
+ return readStream(oleStream);
+}
+
+bool OleStreamReader::readNextPiece(OleMainStream &stream) {
+ const OleMainStream::Pieces &pieces = stream.getPieces();
+ if (myNextPieceNumber >= pieces.size()) {
+ return false;
+ }
+ const OleMainStream::Piece &piece = pieces.at(myNextPieceNumber);
+
+ if (piece.Type == OleMainStream::Piece::PIECE_FOOTNOTE) {
+ footnotesStartHandler();
+ } else if (piece.Type == OleMainStream::Piece::PIECE_OTHER) {
+ return false;
+ }
+
+ if (!stream.seek(piece.Offset, true)) {
+ //TODO maybe in that case we should take next piece?
+ return false;
+ }
+ char *textBuffer = new char[piece.Length];
+ std::size_t readBytes = stream.read(textBuffer, piece.Length);
+ if (readBytes != (std::size_t)piece.Length) {
+ ZLLogger::Instance().println("DocPlugin", "not all bytes have been read from piece");
+ }
+
+ if (!piece.IsANSI) {
+ for (std::size_t i = 0; i < readBytes; i += 2) {
+ ucs2SymbolHandler(OleUtil::getU2Bytes(textBuffer, i));
+ }
+ } else {
+ ansiDataHandler(textBuffer, readBytes);
+ }
+ ++myNextPieceNumber;
+ delete[] textBuffer;
+
+ return true;
+}
diff --git a/fbreader/src/formats/doc/OleStreamReader.h b/fbreader/src/formats/doc/OleStreamReader.h
new file mode 100644
index 0000000..2d2a0ae
--- /dev/null
+++ b/fbreader/src/formats/doc/OleStreamReader.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLESTREAMREADER_H__
+#define __OLESTREAMREADER_H__
+
+#include <ZLUnicodeUtil.h>
+
+#include "OleMainStream.h"
+
+class OleStreamReader {
+
+public:
+ OleStreamReader();
+ bool readDocument(shared_ptr<ZLInputStream> stream, bool doReadFormattingData);
+
+protected:
+ virtual bool readStream(OleMainStream &stream) = 0;
+
+ bool readNextPiece(OleMainStream &stream);
+
+ virtual void ansiDataHandler(const char *buffer, std::size_t len) = 0;
+ virtual void ucs2SymbolHandler(ZLUnicodeUtil::Ucs2Char symbol) = 0;
+ virtual void footnotesStartHandler() = 0;
+
+private:
+ std::size_t myNextPieceNumber;
+};
+
+#endif /* __OLESTREAMREADER_H__ */
diff --git a/fbreader/src/formats/doc/OleUtil.cpp b/fbreader/src/formats/doc/OleUtil.cpp
new file mode 100644
index 0000000..2e8f685
--- /dev/null
+++ b/fbreader/src/formats/doc/OleUtil.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include "OleUtil.h"
+
+int OleUtil::get4Bytes(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return
+ (int)buf[offset]
+ | ((int)buf[offset+1] << 8)
+ | ((int)buf[offset+2] << 16)
+ | ((int)buf[offset+3] << 24);
+}
+
+unsigned int OleUtil::getU4Bytes(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return
+ (unsigned int)buf[offset]
+ | ((unsigned int)buf[offset+1] << 8)
+ | ((unsigned int)buf[offset+2] << 16)
+ | ((unsigned int)buf[offset+3] << 24);
+}
+
+unsigned int OleUtil::getU2Bytes(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return
+ (unsigned int)buf[offset]
+ | ((unsigned int)buf[offset+1] << 8);
+}
+
+unsigned int OleUtil::getU1Byte(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return (unsigned int)buf[offset];
+}
+
+int OleUtil::get1Byte(const char *buffer, unsigned int offset) {
+ const unsigned char *buf = (const unsigned char*)buffer;
+ return (int)buf[offset];
+}
+
+
+
diff --git a/fbreader/src/formats/doc/OleUtil.h b/fbreader/src/formats/doc/OleUtil.h
new file mode 100644
index 0000000..531c769
--- /dev/null
+++ b/fbreader/src/formats/doc/OleUtil.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __OLEUTIL_H__
+#define __OLEUTIL_H__
+
+class OleUtil {
+public:
+ static int get4Bytes(const char *buffer, unsigned int offset);
+ static unsigned int getU4Bytes(const char *buffer, unsigned int offset);
+ static unsigned int getU2Bytes(const char *buffer, unsigned int offset);
+ static unsigned int getU1Byte(const char *buffer, unsigned int offset);
+ static int get1Byte(const char *buffer, unsigned int offset);
+};
+
+#endif /* __OLEUTIL_H__ */