summaryrefslogtreecommitdiffstats
path: root/reader/src/formats/pdf
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2024-06-07 23:30:05 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2024-06-07 23:30:05 +0900
commit17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch)
tree5ed61937459cb7081089111b0242c01ec178f1f3 /reader/src/formats/pdf
parent1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff)
downloadtde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz
tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip
Rename to tde-ebook-reader
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'reader/src/formats/pdf')
-rw-r--r--reader/src/formats/pdf/PdfBookReader.cpp261
-rw-r--r--reader/src/formats/pdf/PdfBookReader.h52
-rw-r--r--reader/src/formats/pdf/PdfDescriptionReader.cpp29
-rw-r--r--reader/src/formats/pdf/PdfDescriptionReader.h40
-rw-r--r--reader/src/formats/pdf/PdfObject.cpp450
-rw-r--r--reader/src/formats/pdf/PdfObject.h201
-rw-r--r--reader/src/formats/pdf/PdfPlugin.cpp42
-rw-r--r--reader/src/formats/pdf/PdfPlugin.h41
-rw-r--r--reader/src/formats/pdf/StringStream.cpp55
-rw-r--r--reader/src/formats/pdf/StringStream.h44
10 files changed, 1215 insertions, 0 deletions
diff --git a/reader/src/formats/pdf/PdfBookReader.cpp b/reader/src/formats/pdf/PdfBookReader.cpp
new file mode 100644
index 0000000..bd84452
--- /dev/null
+++ b/reader/src/formats/pdf/PdfBookReader.cpp
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <iostream>
+
+#include <ZLStringUtil.h>
+#include <ZLInputStream.h>
+
+#include "PdfBookReader.h"
+#include "PdfObject.h"
+#include "../../bookmodel/BookModel.h"
+
+static void readLine(ZLInputStream &stream, std::string &buffer) {
+ buffer.clear();
+ char ch;
+ while (1) {
+ if (stream.read(&ch, 1) != 1) {
+ return;
+ }
+ if ((ch == 10) || (ch == 13)) {
+ if (!buffer.empty()) {
+ return;
+ }
+ } else {
+ buffer += ch;
+ }
+ }
+}
+
+PdfBookReader::PdfBookReader(BookModel &model) : myModelReader(model) {
+}
+
+PdfBookReader::~PdfBookReader() {
+}
+
+shared_ptr<PdfObject> PdfBookReader::readObjectFromLocation(ZLInputStream &stream, const std::pair<int,int> &address) {
+ std::map<std::pair<int,int>,int>::const_iterator jt = myObjectLocationMap.find(address);
+ if (jt == myObjectLocationMap.end()) {
+ return 0;
+ }
+ stream.seek(jt->second, true);
+ char ch = 0;
+ PdfObject::readToken(stream, myBuffer, ch);
+ if (address.first != atoi(myBuffer.c_str())) {
+ return 0;
+ }
+ PdfObject::readToken(stream, myBuffer, ch);
+ if (address.second != atoi(myBuffer.c_str())) {
+ return 0;
+ }
+ PdfObject::readToken(stream, myBuffer, ch);
+ if (myBuffer != "obj") {
+ return 0;
+ }
+ return PdfObject::readObject(stream, ch);
+}
+
+shared_ptr<PdfObject> PdfBookReader::resolveReference(shared_ptr<PdfObject> ref, ZLInputStream &stream) {
+ if (ref.isNull() || (ref->type() != PdfObject::REFERENCE)) {
+ return ref;
+ }
+ const PdfObjectReference &reference = (const PdfObjectReference&)*ref;
+ const std::pair<int,int> address(reference.number(), reference.generation());
+ std::map<std::pair<int,int>,shared_ptr<PdfObject> >::const_iterator it = myObjectMap.find(address);
+ if (it != myObjectMap.end()) {
+ return it->second;
+ }
+ std::map<std::pair<int,int>,int>::const_iterator jt = myObjectLocationMap.find(address);
+ shared_ptr<PdfObject> object = readObjectFromLocation(stream, address);
+ myObjectMap.insert(std::make_pair(address, object));
+ return object;
+}
+
+static void stripBuffer(std::string &buffer) {
+ int index = buffer.find('%');
+ if (index >= 0) {
+ buffer.erase(index);
+ }
+ ZLStringUtil::stripWhiteSpaces(buffer);
+}
+
+bool PdfBookReader::readReferenceTable(ZLInputStream &stream, int xrefOffset) {
+ while (true) {
+ stream.seek(xrefOffset, true);
+ readLine(stream, myBuffer);
+ stripBuffer(myBuffer);
+ if (myBuffer != "xref") {
+ return false;
+ }
+
+ while (true) {
+ readLine(stream, myBuffer);
+ stripBuffer(myBuffer);
+ if (myBuffer == "trailer") {
+ break;
+ }
+ const int index = myBuffer.find(' ');
+ const int start = atoi(myBuffer.c_str());
+ const int len = atoi(myBuffer.c_str() + index + 1);
+ for (int i = 0; i < len; ++i) {
+ readLine(stream, myBuffer);
+ stripBuffer(myBuffer);
+ if (myBuffer.length() != 18) {
+ return false;
+ }
+ const int objectOffset = atoi(myBuffer.c_str());
+ const int objectGeneration = atoi(myBuffer.c_str() + 11);
+ const bool objectInUse = myBuffer[17] == 'n';
+ if (objectInUse) {
+ myObjectLocationMap[std::make_pair(start + i, objectGeneration)] = objectOffset;
+ }
+ }
+ }
+ char ch = 0;
+ shared_ptr<PdfObject> trailer = PdfObject::readObject(stream, ch);
+ if (trailer.isNull() || (trailer->type() != PdfObject::DICTIONARY)) {
+ return false;
+ }
+ if (myTrailer.isNull()) {
+ myTrailer = trailer;
+ }
+ PdfDictionaryObject &trailerDictionary = (PdfDictionaryObject&)*trailer;
+ shared_ptr<PdfObject> previous = trailerDictionary["Prev"];
+ if (previous.isNull()) {
+ return true;
+ }
+
+ if (previous->type() != PdfObject::INTEGER_NUMBER) {
+ return false;
+ }
+ xrefOffset = ((PdfIntegerObject&)*previous).value();
+ }
+}
+
+bool PdfBookReader::readBook(shared_ptr<ZLInputStream> stream) {
+ if (stream.isNull() || !stream->open()) {
+ return false;
+ }
+
+ readLine(*stream, myBuffer);
+ if (!ZLStringUtil::stringStartsWith(myBuffer, "%PDF-")) {
+ return false;
+ }
+
+ std::string version = myBuffer.substr(5);
+ std::cerr << "version = " << version << "\n";
+
+ std::size_t eofOffset = stream->sizeOfOpened();
+ if (eofOffset < 100) {
+ return false;
+ }
+
+ stream->seek(eofOffset - 100, true);
+ bool readXrefOffset = false;
+ std::size_t xrefOffset = (std::size_t)-1;
+ while (true) {
+ readLine(*stream, myBuffer);
+ if (myBuffer.empty()) {
+ break;
+ }
+ stripBuffer(myBuffer);
+ if (readXrefOffset) {
+ if (!myBuffer.empty()) {
+ xrefOffset = atoi(myBuffer.c_str());
+ break;
+ }
+ } else if (myBuffer == "startxref") {
+ readXrefOffset = true;
+ }
+ }
+
+ if (!readReferenceTable(*stream, xrefOffset)) {
+ return false;
+ }
+
+ PdfDictionaryObject &trailerDictionary = (PdfDictionaryObject&)*myTrailer;
+ shared_ptr<PdfObject> root = resolveReference(trailerDictionary["Root"], *stream);
+ if (root.isNull() || (root->type() != PdfObject::DICTIONARY)) {
+ return false;
+ }
+
+ PdfDictionaryObject &rootDictionary = (PdfDictionaryObject&)*root;
+ if (rootDictionary["Type"] != PdfNameObject::nameObject("Catalog")) {
+ return false;
+ }
+ shared_ptr<PdfObject> pageRootNode = resolveReference(rootDictionary["Pages"], *stream);
+ if (pageRootNode.isNull() || (pageRootNode->type() != PdfObject::DICTIONARY)) {
+ return false;
+ }
+ PdfDictionaryObject &pageRootNodeDictionary = (PdfDictionaryObject&)*pageRootNode;
+ if (pageRootNodeDictionary["Type"] != PdfNameObject::nameObject("Pages")) {
+ return false;
+ }
+
+ /*
+ shared_ptr<PdfObject> count = pageRootNodeDictionary["Count"];
+ if (!count.isNull() && (count->type() == PdfObject::INTEGER_NUMBER)) {
+ std::cerr << "count = " << ((PdfIntegerObject&)*count).value() << "\n";
+ }
+ */
+ shared_ptr<PdfObject> pages = pageRootNodeDictionary["Kids"];
+ if (pages.isNull() || (pages->type() != PdfObject::ARRAY)) {
+ return false;
+ }
+ const PdfArrayObject& pagesArray = (const PdfArrayObject&)*pages;
+ const std::size_t pageNumber = pagesArray.size();
+ for (std::size_t i = 0; i < pageNumber; ++i) {
+ processPage(pagesArray[i], *stream);
+ }
+
+ return true;
+}
+
+void PdfBookReader::processContents(shared_ptr<PdfObject> contentsObject, ZLInputStream &stream) {
+ contentsObject = resolveReference(contentsObject, stream);
+}
+
+void PdfBookReader::processPage(shared_ptr<PdfObject> pageObject, ZLInputStream &stream) {
+ pageObject = resolveReference(pageObject, stream);
+ if (pageObject.isNull() || pageObject->type() != PdfObject::DICTIONARY) {
+ return;
+ }
+ const PdfDictionaryObject &pageDictionary = (const PdfDictionaryObject&)*pageObject;
+ shared_ptr<PdfObject> contents = pageDictionary["Contents"];
+ if (contents.isNull()) {
+ return;
+ }
+ switch (contents->type()) {
+ default:
+ break;
+ case PdfObject::REFERENCE:
+ processContents(contents, stream);
+ break;
+ case PdfObject::ARRAY:
+ {
+ const PdfArrayObject &array = (const PdfArrayObject&)*contents;
+ const std::size_t len = array.size();
+ for (std::size_t i = 0; i < len; ++i) {
+ processContents(array[i], stream);
+ }
+ break;
+ }
+ }
+}
diff --git a/reader/src/formats/pdf/PdfBookReader.h b/reader/src/formats/pdf/PdfBookReader.h
new file mode 100644
index 0000000..9488dcf
--- /dev/null
+++ b/reader/src/formats/pdf/PdfBookReader.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PdfBOOKREADER_H__
+#define __PdfBOOKREADER_H__
+
+#include <map>
+
+#include "../../bookmodel/BookReader.h"
+
+class PdfObject;
+class PdfObjectReference;
+
+class PdfBookReader {
+
+public:
+ PdfBookReader(BookModel &model);
+ ~PdfBookReader();
+ bool readBook(shared_ptr<ZLInputStream> stream);
+
+private:
+ bool readReferenceTable(ZLInputStream &stream, int offset);
+ shared_ptr<PdfObject> resolveReference(shared_ptr<PdfObject> reference, ZLInputStream &stream);
+ shared_ptr<PdfObject> readObjectFromLocation(ZLInputStream &stream, const std::pair<int,int> &address);
+ void processPage(shared_ptr<PdfObject> pageObject, ZLInputStream &stream);
+ void processContents(shared_ptr<PdfObject> contentsObject, ZLInputStream &stream);
+
+private:
+ BookReader myModelReader;
+ std::string myBuffer;
+ std::map<std::pair<int,int>,int> myObjectLocationMap;
+ std::map<std::pair<int,int>,shared_ptr<PdfObject> > myObjectMap;
+ shared_ptr<PdfObject> myTrailer;
+};
+
+#endif /* __PdfBOOKREADER_H__ */
diff --git a/reader/src/formats/pdf/PdfDescriptionReader.cpp b/reader/src/formats/pdf/PdfDescriptionReader.cpp
new file mode 100644
index 0000000..98937fa
--- /dev/null
+++ b/reader/src/formats/pdf/PdfDescriptionReader.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLInputStream.h>
+
+#include "PdfDescriptionReader.h"
+
+PdfDescriptionReader::PdfDescriptionReader(Book &book) : myBook(book) {
+}
+
+bool PdfDescriptionReader::readMetaInfo(shared_ptr<ZLInputStream> stream) {
+ return true;
+}
diff --git a/reader/src/formats/pdf/PdfDescriptionReader.h b/reader/src/formats/pdf/PdfDescriptionReader.h
new file mode 100644
index 0000000..004cdfa
--- /dev/null
+++ b/reader/src/formats/pdf/PdfDescriptionReader.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PDFDESCRIPTIONREADER_H__
+#define __PDFDESCRIPTIONREADER_H__
+
+#include <string>
+
+class Book;
+
+class PdfDescriptionReader {
+
+public:
+ PdfDescriptionReader(Book &book);
+ ~PdfDescriptionReader();
+ bool readMetaInfo(shared_ptr<ZLInputStream> stream);
+
+private:
+ Book &myBook;
+};
+
+inline PdfDescriptionReader::~PdfDescriptionReader() {}
+
+#endif /* __PDFDESCRIPTIONREADER_H__ */
diff --git a/reader/src/formats/pdf/PdfObject.cpp b/reader/src/formats/pdf/PdfObject.cpp
new file mode 100644
index 0000000..374a618
--- /dev/null
+++ b/reader/src/formats/pdf/PdfObject.cpp
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <iostream>
+
+#include <ZLInputStream.h>
+#include <ZLZDecompressor.h>
+
+#include "PdfObject.h"
+
+PdfObject::~PdfObject() {
+}
+
+shared_ptr<PdfObject> PdfIntegerObject::integerObject(int value) {
+ if ((value < 0) || (value >= 256)) {
+ return new PdfIntegerObject(value);
+ } else {
+ static shared_ptr<PdfObject>* table = new shared_ptr<PdfObject>[256];
+ if (table[value].isNull()) {
+ table[value] = new PdfIntegerObject(value);
+ }
+ return table[value];
+ }
+}
+
+PdfIntegerObject::PdfIntegerObject(int value) : myValue(value) {
+ std::cerr << "PdfIntegerObject " << value << "\n";
+}
+
+int PdfIntegerObject::value() const {
+ return myValue;
+}
+
+PdfObject::Type PdfIntegerObject::type() const {
+ return INTEGER_NUMBER;
+}
+
+shared_ptr<PdfObject> PdfBooleanObject::TRUE() {
+ static shared_ptr<PdfObject> value = new PdfBooleanObject(true);
+ return value;
+}
+
+shared_ptr<PdfObject> PdfBooleanObject::FALSE() {
+ static shared_ptr<PdfObject> value = new PdfBooleanObject(false);
+ return value;
+}
+
+PdfBooleanObject::PdfBooleanObject(bool value) : myValue(value) {
+ std::cerr << "PdfBooleanObject " << value << "\n";
+}
+
+bool PdfBooleanObject::value() const {
+ return myValue;
+}
+
+PdfObject::Type PdfBooleanObject::type() const {
+ return BOOLEAN;
+}
+
+PdfStringObject::PdfStringObject(const std::string &value) : myValue(value) {
+ std::cerr << "PdfStringObject " << value << "\n";
+}
+
+PdfObject::Type PdfStringObject::type() const {
+ return STRING;
+}
+
+std::map<std::string,shared_ptr<PdfObject> > PdfNameObject::ourObjectMap;
+
+shared_ptr<PdfObject> PdfNameObject::nameObject(const std::string &id) {
+ // TODO: process escaped characters
+ std::map<std::string,shared_ptr<PdfObject> >::const_iterator it = ourObjectMap.find(id);
+ if (it != ourObjectMap.end()) {
+ return it->second;
+ }
+ std::cerr << "PdfNameObject " << id << "\n";
+ shared_ptr<PdfObject> object = new PdfNameObject();
+ ourObjectMap.insert(std::make_pair(id, object));
+ return object;
+}
+
+PdfNameObject::PdfNameObject() {
+}
+
+PdfObject::Type PdfNameObject::type() const {
+ return NAME;
+}
+
+PdfDictionaryObject::PdfDictionaryObject() {
+}
+
+void PdfDictionaryObject::setObject(shared_ptr<PdfObject> id, shared_ptr<PdfObject> object) {
+ myMap[id] = object;
+}
+
+shared_ptr<PdfObject> PdfDictionaryObject::operator[](shared_ptr<PdfObject> id) const {
+ std::map<shared_ptr<PdfObject>,shared_ptr<PdfObject> >::const_iterator it = myMap.find(id);
+ return (it != myMap.end()) ? it->second : 0;
+}
+
+shared_ptr<PdfObject> PdfDictionaryObject::operator[](const std::string &id) const {
+ return operator[](PdfNameObject::nameObject(id));
+}
+
+PdfObject::Type PdfDictionaryObject::type() const {
+ return DICTIONARY;
+}
+
+PdfArrayObject::PdfArrayObject() {
+}
+
+void PdfArrayObject::addObject(shared_ptr<PdfObject> object) {
+ myVector.push_back(object);
+}
+
+shared_ptr<PdfObject> PdfArrayObject::popLast() {
+ if (!myVector.empty()) {
+ shared_ptr<PdfObject> last = myVector.back();
+ myVector.pop_back();
+ return last;
+ }
+ return 0;
+}
+
+int PdfArrayObject::size() const {
+ return myVector.size();
+}
+
+shared_ptr<PdfObject> PdfArrayObject::operator[](int index) const {
+ return myVector[index];
+}
+
+PdfObject::Type PdfArrayObject::type() const {
+ return ARRAY;
+}
+
+PdfObjectReference::PdfObjectReference(int number, int generation) : myNumber(number), myGeneration(generation) {
+}
+
+int PdfObjectReference::number() const {
+ return myNumber;
+}
+
+int PdfObjectReference::generation() const {
+ return myGeneration;
+}
+
+PdfObject::Type PdfObjectReference::type() const {
+ return REFERENCE;
+}
+
+PdfStreamObject::PdfStreamObject(const PdfDictionaryObject &dictionary, ZLInputStream &dataStream) {
+ char ch;
+ skipWhiteSpaces(dataStream, ch);
+
+ shared_ptr<PdfObject> length = dictionary["Length"];
+ if (!length.isNull() && (length->type() == INTEGER_NUMBER)) {
+ int value = ((PdfIntegerObject&)*length).value();
+ if (value > 0) {
+ shared_ptr<PdfObject> filter = dictionary["Filter"];
+ if (filter == PdfNameObject::nameObject("FlateDecode")) {
+ dataStream.seek(1, false);
+ ZLZDecompressor decompressor(value - 2);
+ char buffer[2048];
+ while (true) {
+ std::size_t size = decompressor.decompress(dataStream, buffer, 2048);
+ if (size == 0) {
+ break;
+ }
+ myData.append(buffer, size);
+ }
+ std::cerr << myData << "\n";
+ } else {
+ myData.append(value, '\0');
+ myData[0] = ch;
+ dataStream.read((char*)myData.data() + 1, value - 1);
+ }
+ }
+ }
+
+ /*
+ shared_ptr<PdfObject> filter = dictionary["Filter"];
+ if (!filter.isNull()) {
+ switch (filter->type()) {
+ default:
+ break;
+ case NAME:
+ myFilters.push_back(
+ (filter == PdfNameObject::nameObject("FlateDecode")) ?
+ FLATE : UNKNOWN
+ );
+ break;
+ case ARRAY:
+ {
+ // TODO: process filters array
+ }
+ }
+ }
+ */
+}
+
+PdfObject::Type PdfStreamObject::type() const {
+ return STREAM;
+}
+
+enum PdfCharacterType {
+ PDF_CHAR_REGULAR,
+ PDF_CHAR_WHITESPACE,
+ PDF_CHAR_DELIMITER
+};
+
+static PdfCharacterType *PdfCharacterTypeTable = 0;
+
+void PdfObject::skipWhiteSpaces(ZLInputStream &stream, char &ch) {
+ if (PdfCharacterTypeTable == 0) {
+ PdfCharacterTypeTable = new PdfCharacterType[256];
+ for (int i = 0; i < 256; ++i) {
+ PdfCharacterTypeTable[i] = PDF_CHAR_REGULAR;
+ }
+ PdfCharacterTypeTable[0] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[9] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[10] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[12] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[13] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable[32] = PDF_CHAR_WHITESPACE;
+ PdfCharacterTypeTable['('] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable[')'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['<'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['>'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['['] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable[']'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['{'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['}'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['/'] = PDF_CHAR_DELIMITER;
+ PdfCharacterTypeTable['%'] = PDF_CHAR_DELIMITER;
+ }
+
+ while ((PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_WHITESPACE) &&
+ (stream.read(&ch, 1) == 1)) {
+ }
+}
+
+void PdfObject::readToken(ZLInputStream &stream, std::string &buffer, char &ch) {
+ buffer.clear();
+ skipWhiteSpaces(stream, ch);
+ while (PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_REGULAR) {
+ buffer += ch;
+ if (stream.read(&ch, 1) != 1) {
+ break;
+ }
+ }
+}
+
+shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch) {
+ skipWhiteSpaces(stream, ch);
+
+ PdfObject::Type type = PdfObject::NIL;
+ bool hexString = false;
+ switch (ch) {
+ case '(':
+ hexString = false;
+ type = PdfObject::STRING;
+ break;
+ case '<':
+ stream.read(&ch, 1);
+ hexString = true;
+ type = (ch == '<') ? PdfObject::DICTIONARY : PdfObject::STRING;
+ break;
+ case '>': // end of dictionary
+ stream.read(&ch, 1);
+ if (ch == '>') {
+ stream.read(&ch, 1);
+ }
+ return 0;
+ case '/':
+ type = PdfObject::NAME;
+ break;
+ case '[':
+ type = PdfObject::ARRAY;
+ break;
+ case ']': // end of array
+ stream.read(&ch, 1);
+ return 0;
+ case '+':
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ type = PdfObject::INTEGER_NUMBER;
+ break;
+ case 't':
+ case 'f':
+ type = PdfObject::BOOLEAN;
+ break;
+ }
+
+ switch (type) {
+ case PdfObject::DICTIONARY:
+ {
+ ch = 0;
+ shared_ptr<PdfObject> name;
+ shared_ptr<PdfObject> value;
+ shared_ptr<PdfObject> next;
+ PdfDictionaryObject *dictionary = new PdfDictionaryObject();
+ while (true) {
+ next = readObject(stream, ch);
+ if (next.isNull()) {
+ break;
+ }
+ PdfObject::Type oType = next->type();
+ if (oType == PdfObject::NAME) {
+ name = next;
+ value = readObject(stream, ch);
+ if (value.isNull()) {
+ break;
+ }
+ dictionary->setObject(name, value);
+ } else if (oType == PdfObject::INTEGER_NUMBER) {
+ if (value.isNull() || (value->type() != PdfObject::INTEGER_NUMBER)) {
+ break;
+ }
+ skipWhiteSpaces(stream, ch);
+ if (ch != 'R') {
+ break;
+ }
+ const int number = ((PdfIntegerObject&)*value).value();
+ const int generation = ((PdfIntegerObject&)*next).value();
+ dictionary->setObject(name, new PdfObjectReference(number, generation));
+ value = 0;
+ ch = 0;
+ } else {
+ break;
+ }
+ }
+ std::string token;
+ readToken(stream, token, ch);
+ if (token == "stream") {
+ shared_ptr<PdfObject> d = dictionary;
+ return new PdfStreamObject(*dictionary, stream);
+ } else {
+ return dictionary;
+ }
+ }
+ case PdfObject::NAME:
+ {
+ std::string name;
+ stream.read(&ch, 1);
+ readToken(stream, name, ch);
+ return PdfNameObject::nameObject(name);
+ }
+ case PdfObject::BOOLEAN:
+ {
+ std::string name;
+ readToken(stream, name, ch);
+ return (name == "true") ? PdfBooleanObject::TRUE() : PdfBooleanObject::FALSE();
+ }
+ case PdfObject::INTEGER_NUMBER:
+ {
+ std::string str;
+ if ((ch == '+') || (ch == '-')) {
+ str += ch;
+ stream.read(&ch, 1);
+ }
+ while ((ch >= '0') && (ch <= '9')) {
+ str += ch;
+ stream.read(&ch, 1);
+ }
+ return PdfIntegerObject::integerObject(atoi(str.c_str()));
+ }
+ case PdfObject::STRING:
+ {
+ std::string value;
+ if (hexString) {
+ char num[3];
+ num[2] = '\0';
+ while (ch != '>') {
+ num[0] = ch;
+ stream.read(num + 1, 1);
+ value += (char)strtol(num, 0, 16);
+ stream.read(&ch, 1);
+ }
+ ch = 0;
+ } else {
+ // TODO: implement
+ }
+ return new PdfStringObject(value);
+ }
+ case PdfObject::ARRAY:
+ {
+ PdfArrayObject *array = new PdfArrayObject();
+ ch = 0;
+ while (true) {
+ skipWhiteSpaces(stream, ch);
+ if (ch == 'R') {
+ const int size = array->size();
+ if ((size >= 2) &&
+ ((*array)[size - 1]->type() == PdfObject::INTEGER_NUMBER) &&
+ ((*array)[size - 2]->type() == PdfObject::INTEGER_NUMBER)) {
+ const int generation = ((PdfIntegerObject&)*array->popLast()).value();
+ const int number = ((PdfIntegerObject&)*array->popLast()).value();
+ array->addObject(new PdfObjectReference(number, generation));
+ ch = 0;
+ }
+ }
+ shared_ptr<PdfObject> object = readObject(stream, ch);
+ if (object.isNull()) {
+ break;
+ }
+ array->addObject(object);
+ }
+ std::cerr << "PdfArrayObject " << array->size() << "\n";
+ return array;
+ }
+ default:
+ break;
+ }
+
+ std::string buffer;
+ stream.read(&ch, 1);
+ while (PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_REGULAR) {
+ buffer += ch;
+ stream.read(&ch, 1);
+ }
+ std::cerr << "buffer = " << buffer << "\n";
+
+ return 0;
+}
diff --git a/reader/src/formats/pdf/PdfObject.h b/reader/src/formats/pdf/PdfObject.h
new file mode 100644
index 0000000..76b8528
--- /dev/null
+++ b/reader/src/formats/pdf/PdfObject.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PDFOBJECT_H__
+#define __PDFOBJECT_H__
+
+#include <string>
+#include <vector>
+#include <map>
+
+#include <shared_ptr.h>
+
+class ZLInputStream;
+
+class PdfObject {
+
+public:
+ static shared_ptr<PdfObject> readObject(ZLInputStream &stream, char &ch);
+ static void readToken(ZLInputStream &stream, std::string &buffer, char &ch);
+
+protected:
+ static void skipWhiteSpaces(ZLInputStream &stream, char &ch);
+
+public:
+ enum Type {
+ BOOLEAN,
+ INTEGER_NUMBER,
+ REAL_NUMBER,
+ STRING,
+ NAME,
+ ARRAY,
+ DICTIONARY,
+ STREAM,
+ NIL,
+ REFERENCE
+ };
+
+ virtual ~PdfObject();
+
+ virtual Type type() const = 0;
+};
+
+class PdfBooleanObject : public PdfObject {
+
+public:
+ static shared_ptr<PdfObject> TRUE();
+ static shared_ptr<PdfObject> FALSE();
+
+private:
+ PdfBooleanObject(bool value);
+
+public:
+ bool value() const;
+
+private:
+ Type type() const;
+
+private:
+ const bool myValue;
+};
+
+class PdfIntegerObject : public PdfObject {
+
+public:
+ static shared_ptr<PdfObject> integerObject(int value);
+
+private:
+ PdfIntegerObject(int value);
+
+public:
+ int value() const;
+
+private:
+ Type type() const;
+
+private:
+ const int myValue;
+};
+
+class PdfStringObject : public PdfObject {
+
+private:
+ PdfStringObject(const std::string &value);
+
+private:
+ Type type() const;
+
+private:
+ std::string myValue;
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfNameObject : public PdfObject {
+
+public:
+ static shared_ptr<PdfObject> nameObject(const std::string &id);
+
+private:
+ static std::map<std::string,shared_ptr<PdfObject> > ourObjectMap;
+
+private:
+ PdfNameObject();
+
+private:
+ Type type() const;
+};
+
+class PdfDictionaryObject : public PdfObject {
+
+private:
+ PdfDictionaryObject();
+ void setObject(shared_ptr<PdfObject> id, shared_ptr<PdfObject> object);
+
+public:
+ shared_ptr<PdfObject> operator [] (shared_ptr<PdfObject> id) const;
+ shared_ptr<PdfObject> operator [] (const std::string &id) const;
+
+private:
+ Type type() const;
+
+private:
+ std::map<shared_ptr<PdfObject>,shared_ptr<PdfObject> > myMap;
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfStreamObject : public PdfObject {
+
+private:
+ PdfStreamObject(const PdfDictionaryObject &dictionary, ZLInputStream &dataStream);
+
+private:
+ Type type() const;
+
+private:
+ std::string myData;
+ /*
+ enum EncodingType {
+ UNKNOWN,
+ FLATE,
+ };
+ std::vector<EncodingType> myFilters;
+ */
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfArrayObject : public PdfObject {
+
+private:
+ PdfArrayObject();
+ void addObject(shared_ptr<PdfObject> object);
+ shared_ptr<PdfObject> popLast();
+
+public:
+ int size() const;
+ shared_ptr<PdfObject> operator [] (int index) const;
+
+private:
+ Type type() const;
+
+private:
+ std::vector<shared_ptr<PdfObject> > myVector;
+
+friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch);
+};
+
+class PdfObjectReference : public PdfObject {
+
+public:
+ PdfObjectReference(int number, int generation);
+
+ int number() const;
+ int generation() const;
+
+private:
+ Type type() const;
+
+private:
+ const int myNumber;
+ const int myGeneration;
+};
+
+#endif /* __PDFOBJECT_H__ */
diff --git a/reader/src/formats/pdf/PdfPlugin.cpp b/reader/src/formats/pdf/PdfPlugin.cpp
new file mode 100644
index 0000000..06325d4
--- /dev/null
+++ b/reader/src/formats/pdf/PdfPlugin.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <ZLFile.h>
+#include <ZLInputStream.h>
+
+#include "PdfPlugin.h"
+#include "PdfDescriptionReader.h"
+#include "PdfBookReader.h"
+#include "../../library/Book.h"
+
+bool PdfPlugin::acceptsFile(const ZLFile &file) const {
+ return file.extension() == "pdf";
+}
+
+bool PdfPlugin::readMetaInfo(Book &book) const {
+ return PdfDescriptionReader(book).readMetaInfo(ZLFile(path).inputStream());
+}
+
+bool PdfPlugin::readLanguageAndEncoding(Book &book) const {
+ return true;
+}
+
+bool PdfPlugin::readModel(BookModel &model) const {
+ return PdfBookReader(model).readBook(ZLFile(book.fileName()).inputStream());
+}
diff --git a/reader/src/formats/pdf/PdfPlugin.h b/reader/src/formats/pdf/PdfPlugin.h
new file mode 100644
index 0000000..9c330f6
--- /dev/null
+++ b/reader/src/formats/pdf/PdfPlugin.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __PdfPLUGIN_H__
+#define __PdfPLUGIN_H__
+
+#include "../FormatPlugin.h"
+
+class PdfPlugin : public FormatPlugin {
+
+public:
+ PdfPlugin();
+ ~PdfPlugin();
+ bool providesMetaInfo() const;
+ bool acceptsFile(const ZLFile &file) const;
+ bool readMetaInfo(Book &book) const;
+ bool readLanguageAndEncoding(Book &book) const;
+ bool readModel(BookModel &model) const;
+};
+
+inline PdfPlugin::PdfPlugin() {}
+inline PdfPlugin::~PdfPlugin() {}
+inline bool PdfPlugin::providesMetaInfo() const { return true; }
+
+#endif /* __PdfPLUGIN_H__ */
diff --git a/reader/src/formats/pdf/StringStream.cpp b/reader/src/formats/pdf/StringStream.cpp
new file mode 100644
index 0000000..b2369df
--- /dev/null
+++ b/reader/src/formats/pdf/StringStream.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <algorithm>
+
+#include "StringStream.h"
+
+StringStream::StringStream(const std::string &data) : myData(data), myOffset(0) {
+}
+
+bool StringStream::open() {
+ myOffset = 0;
+ return true;
+}
+
+std::size_t StringStream::read(char *buffer, std::size_t maxSize) {
+ std::size_t size = std::min(maxSize, myData.length() - myOffset);
+ memcpy(buffer, myData.data() + myOffset, size);
+ myOffset += size;
+ return size;
+}
+
+void StringStream::close() {
+}
+
+void StringStream::seek(int offset, bool absoluteOffset) {
+ if (!absoluteOffset) {
+ offset += myOffset;
+ }
+ myOffset = std::min((std::size_t)std::max(0, offset), myData.length());
+}
+
+std::size_t StringStream::offset() const {
+ return myOffset;
+}
+
+std::size_t StringStream::sizeOfOpened() {
+ return myData.length();
+}
diff --git a/reader/src/formats/pdf/StringStream.h b/reader/src/formats/pdf/StringStream.h
new file mode 100644
index 0000000..f46c038
--- /dev/null
+++ b/reader/src/formats/pdf/StringStream.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __STRINGSTREAM_H__
+#define __STRINGSTREAM_H__
+
+#include <ZLInputStream.h>
+
+class StringStream : public ZLInputStream {
+
+public:
+ StringStream(const std::string &data);
+
+public:
+ bool open();
+ std::size_t read(char *buffer, std::size_t maxSize);
+ void close();
+
+ void seek(int offset, bool absoluteOffset);
+ std::size_t offset() const;
+ std::size_t sizeOfOpened();
+
+private:
+ const std::string &myData;
+ std::size_t myOffset;
+};
+
+#endif /* __STRINGSTREAM_H__ */