diff options
Diffstat (limited to 'reader/src/formats/pdf')
-rw-r--r-- | reader/src/formats/pdf/PdfBookReader.cpp | 261 | ||||
-rw-r--r-- | reader/src/formats/pdf/PdfBookReader.h | 52 | ||||
-rw-r--r-- | reader/src/formats/pdf/PdfDescriptionReader.cpp | 29 | ||||
-rw-r--r-- | reader/src/formats/pdf/PdfDescriptionReader.h | 40 | ||||
-rw-r--r-- | reader/src/formats/pdf/PdfObject.cpp | 450 | ||||
-rw-r--r-- | reader/src/formats/pdf/PdfObject.h | 201 | ||||
-rw-r--r-- | reader/src/formats/pdf/PdfPlugin.cpp | 42 | ||||
-rw-r--r-- | reader/src/formats/pdf/PdfPlugin.h | 41 | ||||
-rw-r--r-- | reader/src/formats/pdf/StringStream.cpp | 55 | ||||
-rw-r--r-- | reader/src/formats/pdf/StringStream.h | 44 |
10 files changed, 1215 insertions, 0 deletions
diff --git a/reader/src/formats/pdf/PdfBookReader.cpp b/reader/src/formats/pdf/PdfBookReader.cpp new file mode 100644 index 0000000..bd84452 --- /dev/null +++ b/reader/src/formats/pdf/PdfBookReader.cpp @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cstdlib> +#include <iostream> + +#include <ZLStringUtil.h> +#include <ZLInputStream.h> + +#include "PdfBookReader.h" +#include "PdfObject.h" +#include "../../bookmodel/BookModel.h" + +static void readLine(ZLInputStream &stream, std::string &buffer) { + buffer.clear(); + char ch; + while (1) { + if (stream.read(&ch, 1) != 1) { + return; + } + if ((ch == 10) || (ch == 13)) { + if (!buffer.empty()) { + return; + } + } else { + buffer += ch; + } + } +} + +PdfBookReader::PdfBookReader(BookModel &model) : myModelReader(model) { +} + +PdfBookReader::~PdfBookReader() { +} + +shared_ptr<PdfObject> PdfBookReader::readObjectFromLocation(ZLInputStream &stream, const std::pair<int,int> &address) { + std::map<std::pair<int,int>,int>::const_iterator jt = myObjectLocationMap.find(address); + if (jt == myObjectLocationMap.end()) { + return 0; + } + stream.seek(jt->second, true); + char ch = 0; + PdfObject::readToken(stream, myBuffer, ch); + if (address.first != atoi(myBuffer.c_str())) { + return 0; + } + PdfObject::readToken(stream, myBuffer, ch); + if (address.second != atoi(myBuffer.c_str())) { + return 0; + } + PdfObject::readToken(stream, myBuffer, ch); + if (myBuffer != "obj") { + return 0; + } + return PdfObject::readObject(stream, ch); +} + +shared_ptr<PdfObject> PdfBookReader::resolveReference(shared_ptr<PdfObject> ref, ZLInputStream &stream) { + if (ref.isNull() || (ref->type() != PdfObject::REFERENCE)) { + return ref; + } + const PdfObjectReference &reference = (const PdfObjectReference&)*ref; + const std::pair<int,int> address(reference.number(), reference.generation()); + std::map<std::pair<int,int>,shared_ptr<PdfObject> >::const_iterator it = myObjectMap.find(address); + if (it != myObjectMap.end()) { + return it->second; + } + std::map<std::pair<int,int>,int>::const_iterator jt = myObjectLocationMap.find(address); + shared_ptr<PdfObject> object = readObjectFromLocation(stream, address); + myObjectMap.insert(std::make_pair(address, object)); + return object; +} + +static void stripBuffer(std::string &buffer) { + int index = buffer.find('%'); + if (index >= 0) { + buffer.erase(index); + } + ZLStringUtil::stripWhiteSpaces(buffer); +} + +bool PdfBookReader::readReferenceTable(ZLInputStream &stream, int xrefOffset) { + while (true) { + stream.seek(xrefOffset, true); + readLine(stream, myBuffer); + stripBuffer(myBuffer); + if (myBuffer != "xref") { + return false; + } + + while (true) { + readLine(stream, myBuffer); + stripBuffer(myBuffer); + if (myBuffer == "trailer") { + break; + } + const int index = myBuffer.find(' '); + const int start = atoi(myBuffer.c_str()); + const int len = atoi(myBuffer.c_str() + index + 1); + for (int i = 0; i < len; ++i) { + readLine(stream, myBuffer); + stripBuffer(myBuffer); + if (myBuffer.length() != 18) { + return false; + } + const int objectOffset = atoi(myBuffer.c_str()); + const int objectGeneration = atoi(myBuffer.c_str() + 11); + const bool objectInUse = myBuffer[17] == 'n'; + if (objectInUse) { + myObjectLocationMap[std::make_pair(start + i, objectGeneration)] = objectOffset; + } + } + } + char ch = 0; + shared_ptr<PdfObject> trailer = PdfObject::readObject(stream, ch); + if (trailer.isNull() || (trailer->type() != PdfObject::DICTIONARY)) { + return false; + } + if (myTrailer.isNull()) { + myTrailer = trailer; + } + PdfDictionaryObject &trailerDictionary = (PdfDictionaryObject&)*trailer; + shared_ptr<PdfObject> previous = trailerDictionary["Prev"]; + if (previous.isNull()) { + return true; + } + + if (previous->type() != PdfObject::INTEGER_NUMBER) { + return false; + } + xrefOffset = ((PdfIntegerObject&)*previous).value(); + } +} + +bool PdfBookReader::readBook(shared_ptr<ZLInputStream> stream) { + if (stream.isNull() || !stream->open()) { + return false; + } + + readLine(*stream, myBuffer); + if (!ZLStringUtil::stringStartsWith(myBuffer, "%PDF-")) { + return false; + } + + std::string version = myBuffer.substr(5); + std::cerr << "version = " << version << "\n"; + + std::size_t eofOffset = stream->sizeOfOpened(); + if (eofOffset < 100) { + return false; + } + + stream->seek(eofOffset - 100, true); + bool readXrefOffset = false; + std::size_t xrefOffset = (std::size_t)-1; + while (true) { + readLine(*stream, myBuffer); + if (myBuffer.empty()) { + break; + } + stripBuffer(myBuffer); + if (readXrefOffset) { + if (!myBuffer.empty()) { + xrefOffset = atoi(myBuffer.c_str()); + break; + } + } else if (myBuffer == "startxref") { + readXrefOffset = true; + } + } + + if (!readReferenceTable(*stream, xrefOffset)) { + return false; + } + + PdfDictionaryObject &trailerDictionary = (PdfDictionaryObject&)*myTrailer; + shared_ptr<PdfObject> root = resolveReference(trailerDictionary["Root"], *stream); + if (root.isNull() || (root->type() != PdfObject::DICTIONARY)) { + return false; + } + + PdfDictionaryObject &rootDictionary = (PdfDictionaryObject&)*root; + if (rootDictionary["Type"] != PdfNameObject::nameObject("Catalog")) { + return false; + } + shared_ptr<PdfObject> pageRootNode = resolveReference(rootDictionary["Pages"], *stream); + if (pageRootNode.isNull() || (pageRootNode->type() != PdfObject::DICTIONARY)) { + return false; + } + PdfDictionaryObject &pageRootNodeDictionary = (PdfDictionaryObject&)*pageRootNode; + if (pageRootNodeDictionary["Type"] != PdfNameObject::nameObject("Pages")) { + return false; + } + + /* + shared_ptr<PdfObject> count = pageRootNodeDictionary["Count"]; + if (!count.isNull() && (count->type() == PdfObject::INTEGER_NUMBER)) { + std::cerr << "count = " << ((PdfIntegerObject&)*count).value() << "\n"; + } + */ + shared_ptr<PdfObject> pages = pageRootNodeDictionary["Kids"]; + if (pages.isNull() || (pages->type() != PdfObject::ARRAY)) { + return false; + } + const PdfArrayObject& pagesArray = (const PdfArrayObject&)*pages; + const std::size_t pageNumber = pagesArray.size(); + for (std::size_t i = 0; i < pageNumber; ++i) { + processPage(pagesArray[i], *stream); + } + + return true; +} + +void PdfBookReader::processContents(shared_ptr<PdfObject> contentsObject, ZLInputStream &stream) { + contentsObject = resolveReference(contentsObject, stream); +} + +void PdfBookReader::processPage(shared_ptr<PdfObject> pageObject, ZLInputStream &stream) { + pageObject = resolveReference(pageObject, stream); + if (pageObject.isNull() || pageObject->type() != PdfObject::DICTIONARY) { + return; + } + const PdfDictionaryObject &pageDictionary = (const PdfDictionaryObject&)*pageObject; + shared_ptr<PdfObject> contents = pageDictionary["Contents"]; + if (contents.isNull()) { + return; + } + switch (contents->type()) { + default: + break; + case PdfObject::REFERENCE: + processContents(contents, stream); + break; + case PdfObject::ARRAY: + { + const PdfArrayObject &array = (const PdfArrayObject&)*contents; + const std::size_t len = array.size(); + for (std::size_t i = 0; i < len; ++i) { + processContents(array[i], stream); + } + break; + } + } +} diff --git a/reader/src/formats/pdf/PdfBookReader.h b/reader/src/formats/pdf/PdfBookReader.h new file mode 100644 index 0000000..9488dcf --- /dev/null +++ b/reader/src/formats/pdf/PdfBookReader.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __PdfBOOKREADER_H__ +#define __PdfBOOKREADER_H__ + +#include <map> + +#include "../../bookmodel/BookReader.h" + +class PdfObject; +class PdfObjectReference; + +class PdfBookReader { + +public: + PdfBookReader(BookModel &model); + ~PdfBookReader(); + bool readBook(shared_ptr<ZLInputStream> stream); + +private: + bool readReferenceTable(ZLInputStream &stream, int offset); + shared_ptr<PdfObject> resolveReference(shared_ptr<PdfObject> reference, ZLInputStream &stream); + shared_ptr<PdfObject> readObjectFromLocation(ZLInputStream &stream, const std::pair<int,int> &address); + void processPage(shared_ptr<PdfObject> pageObject, ZLInputStream &stream); + void processContents(shared_ptr<PdfObject> contentsObject, ZLInputStream &stream); + +private: + BookReader myModelReader; + std::string myBuffer; + std::map<std::pair<int,int>,int> myObjectLocationMap; + std::map<std::pair<int,int>,shared_ptr<PdfObject> > myObjectMap; + shared_ptr<PdfObject> myTrailer; +}; + +#endif /* __PdfBOOKREADER_H__ */ diff --git a/reader/src/formats/pdf/PdfDescriptionReader.cpp b/reader/src/formats/pdf/PdfDescriptionReader.cpp new file mode 100644 index 0000000..98937fa --- /dev/null +++ b/reader/src/formats/pdf/PdfDescriptionReader.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <ZLInputStream.h> + +#include "PdfDescriptionReader.h" + +PdfDescriptionReader::PdfDescriptionReader(Book &book) : myBook(book) { +} + +bool PdfDescriptionReader::readMetaInfo(shared_ptr<ZLInputStream> stream) { + return true; +} diff --git a/reader/src/formats/pdf/PdfDescriptionReader.h b/reader/src/formats/pdf/PdfDescriptionReader.h new file mode 100644 index 0000000..004cdfa --- /dev/null +++ b/reader/src/formats/pdf/PdfDescriptionReader.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __PDFDESCRIPTIONREADER_H__ +#define __PDFDESCRIPTIONREADER_H__ + +#include <string> + +class Book; + +class PdfDescriptionReader { + +public: + PdfDescriptionReader(Book &book); + ~PdfDescriptionReader(); + bool readMetaInfo(shared_ptr<ZLInputStream> stream); + +private: + Book &myBook; +}; + +inline PdfDescriptionReader::~PdfDescriptionReader() {} + +#endif /* __PDFDESCRIPTIONREADER_H__ */ diff --git a/reader/src/formats/pdf/PdfObject.cpp b/reader/src/formats/pdf/PdfObject.cpp new file mode 100644 index 0000000..374a618 --- /dev/null +++ b/reader/src/formats/pdf/PdfObject.cpp @@ -0,0 +1,450 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <iostream> + +#include <ZLInputStream.h> +#include <ZLZDecompressor.h> + +#include "PdfObject.h" + +PdfObject::~PdfObject() { +} + +shared_ptr<PdfObject> PdfIntegerObject::integerObject(int value) { + if ((value < 0) || (value >= 256)) { + return new PdfIntegerObject(value); + } else { + static shared_ptr<PdfObject>* table = new shared_ptr<PdfObject>[256]; + if (table[value].isNull()) { + table[value] = new PdfIntegerObject(value); + } + return table[value]; + } +} + +PdfIntegerObject::PdfIntegerObject(int value) : myValue(value) { + std::cerr << "PdfIntegerObject " << value << "\n"; +} + +int PdfIntegerObject::value() const { + return myValue; +} + +PdfObject::Type PdfIntegerObject::type() const { + return INTEGER_NUMBER; +} + +shared_ptr<PdfObject> PdfBooleanObject::TRUE() { + static shared_ptr<PdfObject> value = new PdfBooleanObject(true); + return value; +} + +shared_ptr<PdfObject> PdfBooleanObject::FALSE() { + static shared_ptr<PdfObject> value = new PdfBooleanObject(false); + return value; +} + +PdfBooleanObject::PdfBooleanObject(bool value) : myValue(value) { + std::cerr << "PdfBooleanObject " << value << "\n"; +} + +bool PdfBooleanObject::value() const { + return myValue; +} + +PdfObject::Type PdfBooleanObject::type() const { + return BOOLEAN; +} + +PdfStringObject::PdfStringObject(const std::string &value) : myValue(value) { + std::cerr << "PdfStringObject " << value << "\n"; +} + +PdfObject::Type PdfStringObject::type() const { + return STRING; +} + +std::map<std::string,shared_ptr<PdfObject> > PdfNameObject::ourObjectMap; + +shared_ptr<PdfObject> PdfNameObject::nameObject(const std::string &id) { + // TODO: process escaped characters + std::map<std::string,shared_ptr<PdfObject> >::const_iterator it = ourObjectMap.find(id); + if (it != ourObjectMap.end()) { + return it->second; + } + std::cerr << "PdfNameObject " << id << "\n"; + shared_ptr<PdfObject> object = new PdfNameObject(); + ourObjectMap.insert(std::make_pair(id, object)); + return object; +} + +PdfNameObject::PdfNameObject() { +} + +PdfObject::Type PdfNameObject::type() const { + return NAME; +} + +PdfDictionaryObject::PdfDictionaryObject() { +} + +void PdfDictionaryObject::setObject(shared_ptr<PdfObject> id, shared_ptr<PdfObject> object) { + myMap[id] = object; +} + +shared_ptr<PdfObject> PdfDictionaryObject::operator[](shared_ptr<PdfObject> id) const { + std::map<shared_ptr<PdfObject>,shared_ptr<PdfObject> >::const_iterator it = myMap.find(id); + return (it != myMap.end()) ? it->second : 0; +} + +shared_ptr<PdfObject> PdfDictionaryObject::operator[](const std::string &id) const { + return operator[](PdfNameObject::nameObject(id)); +} + +PdfObject::Type PdfDictionaryObject::type() const { + return DICTIONARY; +} + +PdfArrayObject::PdfArrayObject() { +} + +void PdfArrayObject::addObject(shared_ptr<PdfObject> object) { + myVector.push_back(object); +} + +shared_ptr<PdfObject> PdfArrayObject::popLast() { + if (!myVector.empty()) { + shared_ptr<PdfObject> last = myVector.back(); + myVector.pop_back(); + return last; + } + return 0; +} + +int PdfArrayObject::size() const { + return myVector.size(); +} + +shared_ptr<PdfObject> PdfArrayObject::operator[](int index) const { + return myVector[index]; +} + +PdfObject::Type PdfArrayObject::type() const { + return ARRAY; +} + +PdfObjectReference::PdfObjectReference(int number, int generation) : myNumber(number), myGeneration(generation) { +} + +int PdfObjectReference::number() const { + return myNumber; +} + +int PdfObjectReference::generation() const { + return myGeneration; +} + +PdfObject::Type PdfObjectReference::type() const { + return REFERENCE; +} + +PdfStreamObject::PdfStreamObject(const PdfDictionaryObject &dictionary, ZLInputStream &dataStream) { + char ch; + skipWhiteSpaces(dataStream, ch); + + shared_ptr<PdfObject> length = dictionary["Length"]; + if (!length.isNull() && (length->type() == INTEGER_NUMBER)) { + int value = ((PdfIntegerObject&)*length).value(); + if (value > 0) { + shared_ptr<PdfObject> filter = dictionary["Filter"]; + if (filter == PdfNameObject::nameObject("FlateDecode")) { + dataStream.seek(1, false); + ZLZDecompressor decompressor(value - 2); + char buffer[2048]; + while (true) { + std::size_t size = decompressor.decompress(dataStream, buffer, 2048); + if (size == 0) { + break; + } + myData.append(buffer, size); + } + std::cerr << myData << "\n"; + } else { + myData.append(value, '\0'); + myData[0] = ch; + dataStream.read((char*)myData.data() + 1, value - 1); + } + } + } + + /* + shared_ptr<PdfObject> filter = dictionary["Filter"]; + if (!filter.isNull()) { + switch (filter->type()) { + default: + break; + case NAME: + myFilters.push_back( + (filter == PdfNameObject::nameObject("FlateDecode")) ? + FLATE : UNKNOWN + ); + break; + case ARRAY: + { + // TODO: process filters array + } + } + } + */ +} + +PdfObject::Type PdfStreamObject::type() const { + return STREAM; +} + +enum PdfCharacterType { + PDF_CHAR_REGULAR, + PDF_CHAR_WHITESPACE, + PDF_CHAR_DELIMITER +}; + +static PdfCharacterType *PdfCharacterTypeTable = 0; + +void PdfObject::skipWhiteSpaces(ZLInputStream &stream, char &ch) { + if (PdfCharacterTypeTable == 0) { + PdfCharacterTypeTable = new PdfCharacterType[256]; + for (int i = 0; i < 256; ++i) { + PdfCharacterTypeTable[i] = PDF_CHAR_REGULAR; + } + PdfCharacterTypeTable[0] = PDF_CHAR_WHITESPACE; + PdfCharacterTypeTable[9] = PDF_CHAR_WHITESPACE; + PdfCharacterTypeTable[10] = PDF_CHAR_WHITESPACE; + PdfCharacterTypeTable[12] = PDF_CHAR_WHITESPACE; + PdfCharacterTypeTable[13] = PDF_CHAR_WHITESPACE; + PdfCharacterTypeTable[32] = PDF_CHAR_WHITESPACE; + PdfCharacterTypeTable['('] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable[')'] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable['<'] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable['>'] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable['['] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable[']'] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable['{'] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable['}'] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable['/'] = PDF_CHAR_DELIMITER; + PdfCharacterTypeTable['%'] = PDF_CHAR_DELIMITER; + } + + while ((PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_WHITESPACE) && + (stream.read(&ch, 1) == 1)) { + } +} + +void PdfObject::readToken(ZLInputStream &stream, std::string &buffer, char &ch) { + buffer.clear(); + skipWhiteSpaces(stream, ch); + while (PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_REGULAR) { + buffer += ch; + if (stream.read(&ch, 1) != 1) { + break; + } + } +} + +shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch) { + skipWhiteSpaces(stream, ch); + + PdfObject::Type type = PdfObject::NIL; + bool hexString = false; + switch (ch) { + case '(': + hexString = false; + type = PdfObject::STRING; + break; + case '<': + stream.read(&ch, 1); + hexString = true; + type = (ch == '<') ? PdfObject::DICTIONARY : PdfObject::STRING; + break; + case '>': // end of dictionary + stream.read(&ch, 1); + if (ch == '>') { + stream.read(&ch, 1); + } + return 0; + case '/': + type = PdfObject::NAME; + break; + case '[': + type = PdfObject::ARRAY; + break; + case ']': // end of array + stream.read(&ch, 1); + return 0; + case '+': + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + type = PdfObject::INTEGER_NUMBER; + break; + case 't': + case 'f': + type = PdfObject::BOOLEAN; + break; + } + + switch (type) { + case PdfObject::DICTIONARY: + { + ch = 0; + shared_ptr<PdfObject> name; + shared_ptr<PdfObject> value; + shared_ptr<PdfObject> next; + PdfDictionaryObject *dictionary = new PdfDictionaryObject(); + while (true) { + next = readObject(stream, ch); + if (next.isNull()) { + break; + } + PdfObject::Type oType = next->type(); + if (oType == PdfObject::NAME) { + name = next; + value = readObject(stream, ch); + if (value.isNull()) { + break; + } + dictionary->setObject(name, value); + } else if (oType == PdfObject::INTEGER_NUMBER) { + if (value.isNull() || (value->type() != PdfObject::INTEGER_NUMBER)) { + break; + } + skipWhiteSpaces(stream, ch); + if (ch != 'R') { + break; + } + const int number = ((PdfIntegerObject&)*value).value(); + const int generation = ((PdfIntegerObject&)*next).value(); + dictionary->setObject(name, new PdfObjectReference(number, generation)); + value = 0; + ch = 0; + } else { + break; + } + } + std::string token; + readToken(stream, token, ch); + if (token == "stream") { + shared_ptr<PdfObject> d = dictionary; + return new PdfStreamObject(*dictionary, stream); + } else { + return dictionary; + } + } + case PdfObject::NAME: + { + std::string name; + stream.read(&ch, 1); + readToken(stream, name, ch); + return PdfNameObject::nameObject(name); + } + case PdfObject::BOOLEAN: + { + std::string name; + readToken(stream, name, ch); + return (name == "true") ? PdfBooleanObject::TRUE() : PdfBooleanObject::FALSE(); + } + case PdfObject::INTEGER_NUMBER: + { + std::string str; + if ((ch == '+') || (ch == '-')) { + str += ch; + stream.read(&ch, 1); + } + while ((ch >= '0') && (ch <= '9')) { + str += ch; + stream.read(&ch, 1); + } + return PdfIntegerObject::integerObject(atoi(str.c_str())); + } + case PdfObject::STRING: + { + std::string value; + if (hexString) { + char num[3]; + num[2] = '\0'; + while (ch != '>') { + num[0] = ch; + stream.read(num + 1, 1); + value += (char)strtol(num, 0, 16); + stream.read(&ch, 1); + } + ch = 0; + } else { + // TODO: implement + } + return new PdfStringObject(value); + } + case PdfObject::ARRAY: + { + PdfArrayObject *array = new PdfArrayObject(); + ch = 0; + while (true) { + skipWhiteSpaces(stream, ch); + if (ch == 'R') { + const int size = array->size(); + if ((size >= 2) && + ((*array)[size - 1]->type() == PdfObject::INTEGER_NUMBER) && + ((*array)[size - 2]->type() == PdfObject::INTEGER_NUMBER)) { + const int generation = ((PdfIntegerObject&)*array->popLast()).value(); + const int number = ((PdfIntegerObject&)*array->popLast()).value(); + array->addObject(new PdfObjectReference(number, generation)); + ch = 0; + } + } + shared_ptr<PdfObject> object = readObject(stream, ch); + if (object.isNull()) { + break; + } + array->addObject(object); + } + std::cerr << "PdfArrayObject " << array->size() << "\n"; + return array; + } + default: + break; + } + + std::string buffer; + stream.read(&ch, 1); + while (PdfCharacterTypeTable[(unsigned char)ch] == PDF_CHAR_REGULAR) { + buffer += ch; + stream.read(&ch, 1); + } + std::cerr << "buffer = " << buffer << "\n"; + + return 0; +} diff --git a/reader/src/formats/pdf/PdfObject.h b/reader/src/formats/pdf/PdfObject.h new file mode 100644 index 0000000..76b8528 --- /dev/null +++ b/reader/src/formats/pdf/PdfObject.h @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __PDFOBJECT_H__ +#define __PDFOBJECT_H__ + +#include <string> +#include <vector> +#include <map> + +#include <shared_ptr.h> + +class ZLInputStream; + +class PdfObject { + +public: + static shared_ptr<PdfObject> readObject(ZLInputStream &stream, char &ch); + static void readToken(ZLInputStream &stream, std::string &buffer, char &ch); + +protected: + static void skipWhiteSpaces(ZLInputStream &stream, char &ch); + +public: + enum Type { + BOOLEAN, + INTEGER_NUMBER, + REAL_NUMBER, + STRING, + NAME, + ARRAY, + DICTIONARY, + STREAM, + NIL, + REFERENCE + }; + + virtual ~PdfObject(); + + virtual Type type() const = 0; +}; + +class PdfBooleanObject : public PdfObject { + +public: + static shared_ptr<PdfObject> TRUE(); + static shared_ptr<PdfObject> FALSE(); + +private: + PdfBooleanObject(bool value); + +public: + bool value() const; + +private: + Type type() const; + +private: + const bool myValue; +}; + +class PdfIntegerObject : public PdfObject { + +public: + static shared_ptr<PdfObject> integerObject(int value); + +private: + PdfIntegerObject(int value); + +public: + int value() const; + +private: + Type type() const; + +private: + const int myValue; +}; + +class PdfStringObject : public PdfObject { + +private: + PdfStringObject(const std::string &value); + +private: + Type type() const; + +private: + std::string myValue; + +friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch); +}; + +class PdfNameObject : public PdfObject { + +public: + static shared_ptr<PdfObject> nameObject(const std::string &id); + +private: + static std::map<std::string,shared_ptr<PdfObject> > ourObjectMap; + +private: + PdfNameObject(); + +private: + Type type() const; +}; + +class PdfDictionaryObject : public PdfObject { + +private: + PdfDictionaryObject(); + void setObject(shared_ptr<PdfObject> id, shared_ptr<PdfObject> object); + +public: + shared_ptr<PdfObject> operator [] (shared_ptr<PdfObject> id) const; + shared_ptr<PdfObject> operator [] (const std::string &id) const; + +private: + Type type() const; + +private: + std::map<shared_ptr<PdfObject>,shared_ptr<PdfObject> > myMap; + +friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch); +}; + +class PdfStreamObject : public PdfObject { + +private: + PdfStreamObject(const PdfDictionaryObject &dictionary, ZLInputStream &dataStream); + +private: + Type type() const; + +private: + std::string myData; + /* + enum EncodingType { + UNKNOWN, + FLATE, + }; + std::vector<EncodingType> myFilters; + */ + +friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch); +}; + +class PdfArrayObject : public PdfObject { + +private: + PdfArrayObject(); + void addObject(shared_ptr<PdfObject> object); + shared_ptr<PdfObject> popLast(); + +public: + int size() const; + shared_ptr<PdfObject> operator [] (int index) const; + +private: + Type type() const; + +private: + std::vector<shared_ptr<PdfObject> > myVector; + +friend shared_ptr<PdfObject> PdfObject::readObject(ZLInputStream &stream, char &ch); +}; + +class PdfObjectReference : public PdfObject { + +public: + PdfObjectReference(int number, int generation); + + int number() const; + int generation() const; + +private: + Type type() const; + +private: + const int myNumber; + const int myGeneration; +}; + +#endif /* __PDFOBJECT_H__ */ diff --git a/reader/src/formats/pdf/PdfPlugin.cpp b/reader/src/formats/pdf/PdfPlugin.cpp new file mode 100644 index 0000000..06325d4 --- /dev/null +++ b/reader/src/formats/pdf/PdfPlugin.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <ZLFile.h> +#include <ZLInputStream.h> + +#include "PdfPlugin.h" +#include "PdfDescriptionReader.h" +#include "PdfBookReader.h" +#include "../../library/Book.h" + +bool PdfPlugin::acceptsFile(const ZLFile &file) const { + return file.extension() == "pdf"; +} + +bool PdfPlugin::readMetaInfo(Book &book) const { + return PdfDescriptionReader(book).readMetaInfo(ZLFile(path).inputStream()); +} + +bool PdfPlugin::readLanguageAndEncoding(Book &book) const { + return true; +} + +bool PdfPlugin::readModel(BookModel &model) const { + return PdfBookReader(model).readBook(ZLFile(book.fileName()).inputStream()); +} diff --git a/reader/src/formats/pdf/PdfPlugin.h b/reader/src/formats/pdf/PdfPlugin.h new file mode 100644 index 0000000..9c330f6 --- /dev/null +++ b/reader/src/formats/pdf/PdfPlugin.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __PdfPLUGIN_H__ +#define __PdfPLUGIN_H__ + +#include "../FormatPlugin.h" + +class PdfPlugin : public FormatPlugin { + +public: + PdfPlugin(); + ~PdfPlugin(); + bool providesMetaInfo() const; + bool acceptsFile(const ZLFile &file) const; + bool readMetaInfo(Book &book) const; + bool readLanguageAndEncoding(Book &book) const; + bool readModel(BookModel &model) const; +}; + +inline PdfPlugin::PdfPlugin() {} +inline PdfPlugin::~PdfPlugin() {} +inline bool PdfPlugin::providesMetaInfo() const { return true; } + +#endif /* __PdfPLUGIN_H__ */ diff --git a/reader/src/formats/pdf/StringStream.cpp b/reader/src/formats/pdf/StringStream.cpp new file mode 100644 index 0000000..b2369df --- /dev/null +++ b/reader/src/formats/pdf/StringStream.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <algorithm> + +#include "StringStream.h" + +StringStream::StringStream(const std::string &data) : myData(data), myOffset(0) { +} + +bool StringStream::open() { + myOffset = 0; + return true; +} + +std::size_t StringStream::read(char *buffer, std::size_t maxSize) { + std::size_t size = std::min(maxSize, myData.length() - myOffset); + memcpy(buffer, myData.data() + myOffset, size); + myOffset += size; + return size; +} + +void StringStream::close() { +} + +void StringStream::seek(int offset, bool absoluteOffset) { + if (!absoluteOffset) { + offset += myOffset; + } + myOffset = std::min((std::size_t)std::max(0, offset), myData.length()); +} + +std::size_t StringStream::offset() const { + return myOffset; +} + +std::size_t StringStream::sizeOfOpened() { + return myData.length(); +} diff --git a/reader/src/formats/pdf/StringStream.h b/reader/src/formats/pdf/StringStream.h new file mode 100644 index 0000000..f46c038 --- /dev/null +++ b/reader/src/formats/pdf/StringStream.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __STRINGSTREAM_H__ +#define __STRINGSTREAM_H__ + +#include <ZLInputStream.h> + +class StringStream : public ZLInputStream { + +public: + StringStream(const std::string &data); + +public: + bool open(); + std::size_t read(char *buffer, std::size_t maxSize); + void close(); + + void seek(int offset, bool absoluteOffset); + std::size_t offset() const; + std::size_t sizeOfOpened(); + +private: + const std::string &myData; + std::size_t myOffset; +}; + +#endif /* __STRINGSTREAM_H__ */ |