diff options
Diffstat (limited to 'kviewshell/plugins/djvu/libdjvu/DjVuDocument.h')
-rw-r--r-- | kviewshell/plugins/djvu/libdjvu/DjVuDocument.h | 1071 |
1 files changed, 1071 insertions, 0 deletions
diff --git a/kviewshell/plugins/djvu/libdjvu/DjVuDocument.h b/kviewshell/plugins/djvu/libdjvu/DjVuDocument.h new file mode 100644 index 00000000..418d0814 --- /dev/null +++ b/kviewshell/plugins/djvu/libdjvu/DjVuDocument.h @@ -0,0 +1,1071 @@ +//C- -*- C++ -*- +//C- ------------------------------------------------------------------- +//C- DjVuLibre-3.5 +//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. +//C- Copyright (c) 2001 AT&T +//C- +//C- This software is subject to, and may be distributed under, the +//C- GNU General Public License, Version 2. The license should have +//C- accompanied the software or you may obtain a copy of the license +//C- from the Free Software Foundation at http://www.fsf.org . +//C- +//C- This program is distributed in the hope that it will be useful, +//C- but WITHOUT ANY WARRANTY; without even the implied warranty of +//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//C- GNU General Public License for more details. +//C- +//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library +//C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech +//C- Software authorized us to replace the original DjVu(r) Reference +//C- Library notice by the following text (see doc/lizard2002.djvu): +//C- +//C- ------------------------------------------------------------------ +//C- | DjVu (r) Reference Library (v. 3.5) +//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. +//C- | The DjVu Reference Library is protected by U.S. Pat. No. +//C- | 6,058,214 and patents pending. +//C- | +//C- | This software is subject to, and may be distributed under, the +//C- | GNU General Public License, Version 2. The license should have +//C- | accompanied the software or you may obtain a copy of the license +//C- | from the Free Software Foundation at http://www.fsf.org . +//C- | +//C- | The computer code originally released by LizardTech under this +//C- | license and unmodified by other parties is deemed "the LIZARDTECH +//C- | ORIGINAL CODE." Subject to any third party intellectual property +//C- | claims, LizardTech grants recipient a worldwide, royalty-free, +//C- | non-exclusive license to make, use, sell, or otherwise dispose of +//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the +//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU +//C- | General Public License. This grant only confers the right to +//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to +//C- | the extent such infringement is reasonably necessary to enable +//C- | recipient to make, have made, practice, sell, or otherwise dispose +//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to +//C- | any greater extent that may be necessary to utilize further +//C- | modifications or combinations. +//C- | +//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY +//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF +//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. +//C- +------------------------------------------------------------------ +// +// $Id: DjVuDocument.h,v 1.10 2005/05/25 20:24:52 leonb Exp $ +// $Name: release_3_5_15 $ + +#ifndef _DJVUDOCUMENT_H +#define _DJVUDOCUMENT_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#if NEED_GNUG_PRAGMAS +# pragma interface +#endif + + +#include "DjVuPort.h" + +#ifdef HAVE_NAMESPACES +namespace DJVU { +# ifdef NOT_DEFINED // Just to fool emacs c++ mode +} +#endif +#endif + +class DjVmDoc; +class DjVmDir; +class DjVmDir0; +class DjVmNav; +class DjVuImage; +class DjVuFile; +class DjVuFileCache; +class DjVuNavDir; +class ByteStream; + +/** @name DjVuDocument.h + Files #"DjVuDocument.h"# and #"DjVuDocument.cpp"# contain implementation + of the \Ref{DjVuDocument} class - the ideal tool for opening, decoding + and saving DjVu single page and multi page documents. + + @memo DjVu document class. + @author Andrei Erofeev <eaf@geocities.com> + @version #$Id: DjVuDocument.h,v 1.10 2005/05/25 20:24:52 leonb Exp $# +*/ + +//@{ + +/** #DjVuDocument# provides convenient interface for opening, decoding + and saving back DjVu documents in single page and multi page formats. + + {\bf Input formats} + It can read multi page DjVu documents in either of the 4 formats: 2 + obsolete ({\em old bundled} and {\em old indexed}) and two new + ({\em new bundled} and {\em new indirect}). + + {\bf Output formats} + To encourage users to switch to the new formats, the #DjVuDocument# can + save documents back only in the new formats: {\em bundled} and + {\em indirect}. + + {\bf Conversion.} Since #DjVuDocument# can open DjVu documents in + an obsolete format and save it in any of the two new formats + ({\em new bundled} and {\em new indirect}), this class can be used for + conversion from obsolete formats to the new ones. Although it can also + do conversion between the new two formats, it's not the best way to + do it. Please refer to \Ref{DjVmDoc} for details. + + {\bf Decoding.} #DjVuDocument# provides convenient interface for obtaining + \Ref{DjVuImage} corresponding to any page of the document. It uses + \Ref{DjVuFileCache} to do caching thus avoiding unnecessary multiple decoding of + the same page. The real decoding though is accomplished by \Ref{DjVuFile}. + + {\bf Messenging.} Being derived from \Ref{DjVuPort}, #DjVuDocument# + takes an active part in exchanging messages (requests and notifications) + between different parties involved in decoding. It reports (relays) + errors, progress information and even handles some requests for data (when + these requests deal with local files). + + Typical usage of #DjVuDocument# class in a threadless command line + program would be the following: + \begin{verbatim} + static const char file_name[]="/tmp/document.djvu"; + GP<DjVuDocument> doc=DjVuDocument::create_wait(file_name); + const int pages=doc->get_pages_num(); + for(int page=0;page<pages;page++) + { + GP<DjVuImage> dimg=doc->get_page(page); + // Do something + }; + \end{verbatim} + + {\bf Comments for the code above} + \begin{enumerate} + \item Since the document is assumed to be stored on the hard drive, + we don't have to cope with \Ref{DjVuPort}s and can pass + #ZERO# pointer to the \Ref{init}() function. #DjVuDocument# + can access local data itself. In the case of a plugin though, + one would have to implement his own \Ref{DjVuPort}, which + would handle requests for data arising when the document + is being decoded. + \item In a threaded program instead of calling the \Ref{init}() + function one can call \Ref{start_init}() and \Ref{stop_init}() + to initiate and interrupt initialization carried out in + another thread. This possibility of initializing the document + in another thread has been added specially for the plugin + because the initialization itself requires data, which is + not immediately available in the plugin. Thus, to prevent the + main thread from blocking, we perform initialization in a + separate thread. To check if the class is completely and + successfully initialized, use \Ref{is_init_ok}(). To see if + there was an error, use \Ref{is_init_failed}(). To + know when initialization is over (whether successfully or not), + use \Ref{is_init_complete}(). To wait for this to happen use + \Ref{wait_for_complete_init}(). Once again, all these things are + not required for single-threaded program. + + Another difference between single-threaded and multi-threaded + environments is that in a single-threaded program, the image is + fully decoded before it's returned. In a multi-threaded + application decoding starts in a separate thread, and the pointer + to the \Ref{DjVuImage} being decoded is returned immediately. + This has been done to enable progressive redisplay + in the DjVu plugin. Use communication mechanism provided by + \Ref{DjVuPort} and \Ref{DjVuPortcaster} to learn about progress + of decoding. Or try #dimg->wait_for_complete_decode()# to wait + until the decoding ends. + \item See Also: \Ref{DjVuFile}, \Ref{DjVuImage}, \Ref{GOS}. + \end{enumerate} + + {\bf Initialization} + As mentioned above, the #DjVuDocument# can go through several stages + of initialization. The functionality is gradually added while it passes + one stage after another: + \begin{enumerate} + \item First of all, immediately after the object is created \Ref{init}() + or \Ref{start_init}() functions must be called. {\bf Nothing} + will work until this is done. \Ref{init}() function will not + return until the initialization is complete. You need to make + sure, that enough data is available. {\bf Do not call \Ref{init}() + in the plugin}. \Ref{start_init}() will start initialization + in another thread. Use \Ref{stop_init}() to interrupt it. + Use \Ref{is_init_complete}() to check the initialization progress. + Use \Ref{wait_for_complete_init}() to wait for init to finish. + \item The first thing the initializing code learns about the document + is its type (#BUNDLED#, #INDIRECT#, #OLD_BUNDLED# or #OLD_INDEXED#). + As soon as it happens, document flags are changed and + #notify_doc_flags_changed()# request is sent through the + communication mechanism provided by \Ref{DjVuPortcaster}. + \item After the document type becomes known, the initializing code + proceeds with learning the document structure. Gradually the + flags are updated with values: + \begin{itemize} + \item #DOC_DIR_KNOWN#: Contents of the document became known. + This is meaningful for #BUNDLED#, #OLD_BUNDLED# and + #INDIRECT# documents only. + \item #DOC_NDIR_KNOWN#: Contents of the document navigation + directory became known. This is meaningful for old-style + documents (#OLD_BUNDLED# and #OLD_INDEXED#) only + \item #DOC_INIT_OK# or #DOC_INIT_FAILED#: + The initializating code finished. + \end{itemize} + \end{enumerate} */ + +class DjVuDocument : public DjVuPort +{ +public: + /** Flags describing the document initialization state. + \begin{itemize} + \item #DOC_TYPE_KNOWN#: The type of the document has been learnt. + \item #DOC_DIR_KNOWN#: Contents of the document became known. + This is meaningful for #BUNDLED#, #OLD_BUNDLED# and + #INDIRECT# documents only. + \item #DOC_NDIR_KNOWN#: Contents of the document navigation + directory became known. This is meaningful for old-style + documents (#OLD_BUNDLED# and #OLD_INDEXED#) only + \item #DOC_INIT_OK#: The initialization has completed successfully. + \item #DOC_INIT_FAILED#: The initialization failed. + \end{itemize} */ + enum DOC_FLAGS { DOC_TYPE_KNOWN=1, DOC_DIR_KNOWN=2, + DOC_NDIR_KNOWN=4, DOC_INIT_OK=8, + DOC_INIT_FAILED=16 }; + /** Specifies the format of #DjVuDocument#. There are currently 4 DjVu + multipage formats recognized by the library. Two of them are obsolete + and should not be used. + \begin{enumerate} + \item #OLD_BUNDLED# - Obsolete bundled format + \item #OLD_INDEXED# - Obsolete multipage format where every page + is stored in a separate file and "includes" (by means + of an #INCL# chunk) the file with the document directory. + \item #SINGLE_PAGE# - Single page document. Basically a file + with either #FORM:DJVU# or #FORM:IW44# and no multipage + information. For example, #OLD_INDEXED# documents with + document directory do not qualify even if they contain only + one page. + \item #BUNDLED# - Currently supported bundled format + \item #INDIRECT# - Currently supported "expanded" format, where + every page and component is stored in a separate file. There + is also a {\em top-level} file with the document directory. + \end{enumerate} */ + enum DOC_TYPE { OLD_BUNDLED=1, OLD_INDEXED, BUNDLED, INDIRECT, + SINGLE_PAGE, UNKNOWN_TYPE }; + enum THREAD_FLAGS { STARTED=1, FINISHED=2 }; + +protected: + /** Default creator. Please call functions \Ref{init}() or + \Ref{start_init}() before you start working with the #DjVuDocument#. + */ + DjVuDocument(void); +public: + + /// Virtual Destructor + virtual ~DjVuDocument(void); + + /** Initializes the #DjVuDocument# object using an existing document. + This function should be called once after creating the object. + The #url# should point to the real data, and the creator of the + document should be ready to return this data to the document + if it's not stored locally (in which case #DjVuDocument# can + access it itself). + + {\bf Initializing thread} + In a single-threaded application, the #start_init()# function performs + the complete initialization of the #DjVuDocument# before it returns. + In a multi-threaded application, though, it initializes some internal + variables, requests data for the document and starts a new + {\em initializing} thread, which is responsible for determining the + document type and structure and completing the initialization + process. This additional complication is justified in the case of + the DjVu plugin because performing initialization requires data and + in the plugin the data can be supplied by the main thread only. + Thus, if the initialization was completed by the main thread, the + plugin would run out of data and block. + + {\bf Stages of initialization} + Immediately after the #start_init()# function terminates, the + #DjVuDocument# object is ready for use. Its functionality will + not be complete (until the initializing thread finishes), but + the object is still very useful. Such functions as \Ref{get_page}() + or \Ref{get_djvu_file}() or \Ref{id_to_url}() may be called + before the initializing thread completes. This allows the DjVu + plugin start decoding as soon as possible without waiting for + all data to arrive. + + To query the current stage of initialization you can use + \Ref{get_doc_flags}() function or listen to the + #notify_doc_flags_changed()# notifications distributed with the help + of \Ref{DjVuPortcaster}. To wait for the initialization to + complete use \Ref{wait_for_complete_init}(). To stop initialization + call \Ref{stop_init}(). + + {\bf Querying data} + The query for data is done using the communication mechanism + provided by \Ref{DjVuPort} and \Ref{DjVuPortcaster}. If #port# + is not #ZERO#, then the request for data will be forwarded to it. + If it {\bf is} #ZERO# then #DjVuDocument# will create an internal + instance of \Ref{DjVuSimplePort} and will use it to access local + files and report errors to #stderr#. In short, if the document + file is stored on the local hard disk, and you're OK about reporting + errors to #stderr#, you may pass #ZERO# pointer to \Ref{DjVuPort} + as #DjVuDocument# can take care of this situation by itself. + + {\bf The URL} + Depending on the document type the #url# should point to: + \begin{itemize} + \item {\bf Old bundled} and {\bf New bundled} formats: to the + document itself. + \item {\bf Old indexed} format: to any page of the document. + \item {\bf New indirect} format: to the top-level file of the + document. If (like in the {\em old indexed} format) you + point the #url# to a page, the page {\em will} be decoded, + but it will {\em not} be recognized to be part of the + document. + \end{itemize} + + @param url The URL pointing to the document. If the document is + in a {\em bundled} format then the URL should point to it. + If the document is in the {\em old indexed} format then + URL may point to any page of this document. For {\em new + indirect} format the URL should point to the top-level + file of the document. + @param port If not #ZERO#, all requests and notifications will + be sent to it. Otherwise #DjVuDocument# will create an internal + instance of \Ref{DjVuSimplePort} for these purposes. + It's OK to make it #ZERO# if you're writing a command line + tool, which should work with files on the hard disk only + because #DjVuDocument# can access such files itself. + @param cache It's used to cache decoded \Ref{DjVuFile}s and + is actually useful in the plugin only. */ + void start_init(const GURL & url, GP<DjVuPort> port=0, + DjVuFileCache * cache=0); + + /** This creates a DjVuDocument without initializing it. */ + static GP<DjVuDocument> create_noinit(void) {return new DjVuDocument;} + + /** Create a version of DjVuDocument which has finished initializing. */ + static GP<DjVuDocument> create_wait( + const GURL &url, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0); + + /** Create a version of DjVuDocument which has begun initializing. */ + static GP<DjVuDocument> create( + const GURL &url, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0); + + /** Create a version of DjVuDocument which has begun initializing. */ + static GP<DjVuDocument> create( + GP<DataPool> pool, GP<DjVuPort> xport=0, DjVuFileCache * const xcache=0); + + /** Create a version of DjVuDocument which has begun initializing. */ + static GP<DjVuDocument> create( + const GP<ByteStream> &bs, GP<DjVuPort> xport=0, + DjVuFileCache * const xcache=0); + + /** Call this function when you don't need the #DjVuDocument# any more. + In a multi-threaded environment it will stop initialization + thread, if it is currently running. {\bf You will not be able + to start the initialization again. Thus, after calling this + function the document should not be used any more}. */ + void stop_init(void); + + /** Initializes the document. + + Contrary to \Ref{start_init}(), which just starts the initialization + thread in a multi-threaded environment, this function does not + return until the initialization completes (either successfully or + not). Basically, it calls \Ref{start_init}() and then + \Ref{wait_for_complete_init}(). + */ + void init(const GURL & url, GP<DjVuPort> port=0, + DjVuFileCache * cache=0); + + /** Returns #TRUE# if the initialization thread finished (does not + matter successfully or not). As soon as it happens, the document + becomes completely initialized and its every function should work + properly. Please refer to the description of \Ref{init}() function + and of the #DjVuDocument# class to learn about the initializing + stages. + + To wait for the initialization to complete use + \Ref{wait_for_complete_init}() function. + + To query the initialization stage use \Ref{get_flags}() function. + + To learn whether initialization was successful or not, + use \Ref{is_init_ok}() and \Ref{is_init_failed}(). + + {\bf Note:} In a single threaded application the initialization + completes before the \Ref{init}() function returns. */ + bool is_init_complete(void) const; + + /** Returns #TRUE# is the initialization thread finished successfully. + + See \Ref{is_init_complete}() and \Ref{wait_for_complete_init}() + for more details. */ + bool is_init_ok(void) const; + /** Forces compression with the next save_as function. */ + void set_needs_compression(void); + /** Returns #TRUE# if there are uncompressed pages in this document. */ + bool needs_compression(void) const; + /** Returns #TRUE# if this file must be renamed before saving. */ + bool needs_rename(void) const; + /** Returns #TRUE# if this file must be renamed before saving. */ + bool can_compress(void) const; + + /** Returns #TRUE# is the initialization thread failed. + + See \Ref{is_init_complete}() and \Ref{wait_for_complete_init}() + for more details. */ + bool is_init_failed(void) const; + + /** If the document has already learnt its type, the function will + returns it: #DjVuDocument::OLD_BUNDLED# or + #DjVuDocument::OLD_INDEXED# or #DjVuDocument::SINGLE_PAGE# or + #DjVuDocument:BUNDLED# or #DjVuDocument::INDIRECT#. The first + two formats are obsolete. Otherwise (if the type is unknown yet), + #UNKNOWN_TYPE# will be returned. + + {\bf Note:} To check the stage of the document initialization + use \Ref{get_flags}() or \Ref{is_init_complete}() functions. To + wait for the initialization to complete use \Ref{wait_for_complete_init}(). + For single threaded applications the initialization completes + before the \Ref{init}() function returns. */ + int get_doc_type(void) const; + + /** Returns the document flags. The flags describe the degree in which + the #DjVuDocument# object is initialized. Every time the flags + are changed, a #notify_doc_flags_changed()# notification is + distributed using the \Ref{DjVuPortcaster} communication + mechanism. + + {\bf Note:} To wait for the initialization to complete use + \Ref{wait_for_complete_init}(). For single threaded applications + the initialization completes before the \Ref{init}() function + returns. */ + long get_doc_flags(void) const; + + /** Returns #TRUE# if the document is in bundled format (either in + #DjVuDocument::OLD_BUNDLED# or #DjVuDocument::BUNDLED# formats). */ + bool is_bundled(void) const; + + /// Returns the URL passed to the \Ref{init}() function + GURL get_init_url(void) const; + + /// Returns a listing of id's used by this document. + GList<GUTF8String> get_id_list(void); + + /// Fill the id's into a GMap. + void map_ids( GMap<GUTF8String,void *> &map); + + /** Returns data corresponding to the URL passed to the \Ref{init}() + function. + + {\bf Note:} The pointer returned is guaranteed to be non-#ZERO# + only after the #DjVuDocument# learns its type (passes through + the first stage of initialization process). Please refer to + \Ref{init}() for details. */ + GP<DataPool> get_init_data_pool(void) const; + + /** @name Accessing pages */ + //@{ + /** Returns the number of pages in the document. If there is still + insufficient information about the document structure (initialization + has not finished yet), #1# will be returned. Please refer to + \Ref{init}() for details. */ + int get_pages_num(void) const; + + /** Translates the page number to the full URL of the page. This URL + is "artificial" for the {\em bundled} formats and is obtained + by appending the page name to the document's URL honoring possible + #;# and #?# in it. Negative page number has a special meaning for + #OLD_INDEXED# documents: it points to the URL, which the + #DjVuDocument# has been initialized with. For other formats this + is the same as page #0#. + + The function tries it best to map the page number to the URL. + Although, if the document structure has not been fully discovered + yet, an empty URL will be returned. Use \Ref{wait_for_complete_init}() + to wait until the document initialization completes. Refer to + \Ref{init}() for details. + + Depending on the document format, the function assumes, that there + is enough information to complete the request when: + \begin{itemize} + \item #OLD_INDEXED#: If #page_num<0#, #DOC_TYPE_KNOWN# flag must + be set. Otherwise #DOC_NDIR_KNOWN# must be set. + \item #OLD_BUNDLED#: If #page_num=0#, #DOC_DIR_KNOWN# flag must + be set. Otherwise #DOC_NDIR_KNOWN# flag must be set. + \item #INDIRECT# and #BUNDLED#: #DOC_DIR_KNOWN# flag must be set. + \end{itemize} */ + GURL page_to_url(int page_num) const; + /// Tranlate the page number to id... + GUTF8String page_to_id(int page_num) const + { return url_to_id(page_to_url(page_num)); } + /** Translates the page URL back to page number. Returns #-1# if the + page is not in the document or the document's structure + has not been learnt yet. + + Depending on the document format, the function starts working + properly as soon as: + \begin{itemize} + \item #OLD_INDEXED# and #OLD_BUNDLED# and #SINGLE_PAGE#: + #DOC_NDIR_KNOWN# is set + \item #INDIRECT# and #BUNDLED#: #DOC_DIR_KNOWN# is set. + \end{itemize} */ + int url_to_page(const GURL & url) const; + /// Map the specified url to it's id. + GUTF8String url_to_id(const GURL &url) const + { return url.fname(); } + + /** Translates the textual ID to the complete URL if possible. + + Depending on the document format the translation is done in the + following way: + \begin{itemize} + \item For #BUNDLED# and #INDIRECT# documents the function + scans the \Ref{DjVmDir} (the document directory) and + matches the ID against: + \begin{enumerate} + \item File ID from the \Ref{DjVmDir} + \item File name from the \Ref{DjVmDir} + \item File title from the \Ref{DjVmDir} + \end{enumerate} + Then for #BUNDLED# document the URL is obtained by + appending the #name# of the found file to the document's + URL. + + For #INDIRECT# documents the URL is obtained by + appending the #name# of the found file to the URL of + the directory containing the document. + \item For #OLD_BUNDLED# documents the function compares the ID + with internal name of every file inside the bundle and + composes an artificial URL by appending the file name to + the document's URL. + \item For #OLD_INDEXED# or #SINGLE_PAGE# documents the function + composes the URL by appending the ID to the URL of the + directory containing the document. + \end{itemize} + + If information obtained by the initialization thread is not + sufficient yet, the #id_to_url()# may return an empty URL. + Depending on the document type, the information is sufficient when + \begin{itemize} + \item #BUNDLED# and #INDIRECT#: #DOC_DIR_KNOWN# flag is set. + \item #OLD_BUNDLED# and #OLD_INDEXED# and #SINGLE_PAGE#: + #DOC_TYPE_KNOWN# flag is set. + \end{itemize} */ + GURL id_to_url(const GUTF8String &id) const; + /// Find out which page this id is... + int id_to_page(const GUTF8String &id) const + { return url_to_page(id_to_url(id)); } + + /** Returns \Ref{GP} pointer to \Ref{DjVuImage} corresponding to page + #page_num#. If caching is enabled, and there is a {\em fully decoded} + \Ref{DjVuFile} in the cache, the image will be reused and will + be returned fully decoded. Otherwise, if multi-threaded behavior + is allowed, and #sync# is set to #FALSE#, the decoding will be + started in a separate thread, which enables to do progressive + redisplay. Thus, in this case the image returned may be partially + decoded. + + Negative #page_num# has a special meaning for the {\em old indexed} + multipage documents: the #DjVuDocument# will start decoding of the + URL with which it has been initialized. For other formats page + #-1# is the same as page #0#. + + #DjVuDocument# can also connect the created page to the specified + #port# {\em before starting decoding}. This option will allow + the future owner of \Ref{DjVuImage} to receive all messages and + requests generated during its decoding. + + If this function is called before the document's structure becomes + known (the initialization process completes), the \Ref{DjVuFile}, + which the returned image will be attached to, will be assigned a + temporary artificial URL, which will be corrected as soon as enough + information becomes available. The trick prevents the main thread + from blocking and in some cases helps to start decoding earlier. + The URL is corrected and decoding will start as soon as + #DjVuDocument# passes some given stages of initialization and + \Ref{page_to_url}(), \Ref{id_to_url}() functions start working + properly. Please look through their description for details. + + {\bf Note:} To wait for the initialization to complete use + \Ref{wait_for_complete_init}(). For single threaded applications + the initialization completes before the \Ref{init}() function + returns. + + @param page_num Number of the page to be decoded + @param sync When set to #TRUE# the function will not return + until the page is completely decoded. Otherwise, + in a multi-threaded program, this function will + start decoding in a new thread and will return + a partially decoded image. Refer to + \Ref{DjVuImage::wait_for_complete_decode}() and + \Ref{DjVuFile::is_decode_ok}(). + @param port A pointer to \Ref{DjVuPort}, that the created image + will be connected to. */ + GP<DjVuImage> get_page(int page_num, bool sync=true, DjVuPort * port=0) const; + GP<DjVuImage> get_page(int page_num, bool sync=true, DjVuPort * port=0) + { return const_cast<const DjVuDocument *>(this)->get_page(page_num,sync,port); } + + /** Returns \Ref{GP} pointer to \Ref{DjVuImage} corresponding to the + specified ID. This function behaves exactly as the #get_page()# + function above. The only thing worth mentioning here is how the #ID# + parameter is treated. + + First of all the function checks, if the ID contains a number. + If so, it just calls the #get_page()# function above. If ID is + #ZERO# or just empty, page number #-1# is assumed. Otherwise + the ID is translated to the URL using \Ref{id_to_url}(). */ + GP<DjVuImage> get_page(const GUTF8String &id, bool sync=true, DjVuPort * port=0); + + /** Returns \Ref{DjVuFile} corresponding to the specified page. + Normally it translates the page number to the URL using + \Ref{page_to_url}() and then creates \Ref{DjVuFile} initializing + it with data from the URL. + + The behavior becomes different, though in the case when the + document structure is unknown at the moment this function is called. + In this situations it invents a temporary URL, creates a + \Ref{DjVuFile}, initializes it with this URL and returns + immediately. The caller may start decoding the file right away + (if necessary). The decoding will block but will automatically + continue as soon as enough information is collected about the + document. This trick should be quite transparent to the user and + helps to prevent the main thread from blocking. The decoding will + unblock and this function will stop using this "trick" as soon + as #DjVuDocument# passes some given stages of initialization and + \Ref{page_to_url}(), \Ref{id_to_url}() functions start working + properly. + + If #dont_create# is #FALSE# the function will return the file + only if it already exists. + + {\bf Note:} To wait for the initialization to complete use + \Ref{wait_for_complete_init}(). For single threaded applications + the initialization completes before the \Ref{init}() function + returns. */ + GP<DjVuFile> get_djvu_file(int page_num, bool dont_create=false) const; + GP<DjVuFile> get_djvu_file(int page_num, bool dont_create=false) + { return const_cast<const DjVuDocument *>(this)->get_djvu_file(page_num,dont_create); } + + + /** Returns \Ref{DjVuFile} corresponding to the specified ID. + This function behaves exactly as the #get_djvu_file()# function + above. The only thing worth mentioning here is how the #ID# + parameter is treated. + + First off, \Ref{id_to_url}() is called. If not successfull, + the function checks, if the ID contains a number. + If so, it just calls the #get_djvu_file()# function above. If ID is + #ZERO# or just empty, page number #-1# is assumed. + + If #dont_create# is #FALSE# the function will return the file + only if it already exists. */ + GP<DjVuFile> get_djvu_file(const GUTF8String &id, bool dont_create=false); + GP<DjVuFile> get_djvu_file(const GURL &url, bool dont_create=false); + /** Returns a \Ref{DataPool} containing one chunk #TH44# with + the encoded thumbnail for the specified page. The function + first looks for thumbnails enclosed into the document and if + it fails to find one, it decodes the required page and creates + the thumbnail on the fly (unless #dont_decode# is true). + + {\bf Note:} It may happen that the returned \Ref{DataPool} will + not contain all the data you need. In this case you will need + to install a trigger into the \Ref{DataPool} to learn when the + data actually arrives. */ + virtual GP<DataPool> get_thumbnail(int page_num, bool dont_decode); + /* Will return gamma correction, which was used when creating + thumbnail images. If you need other gamma correction, you will + need to correct the thumbnails again. */ + float get_thumbnails_gamma(void) const; + //@} + + /** Waits until the document initialization process finishes. + It can finish either successfully or not. Use \Ref{is_init_ok}() + and \Ref{is_init_failed}() to learn the result code. + + As described in \Ref{start_init}(), for multi-threaded applications the + initialization is carried out in parallel with the main thread. + This function blocks the calling thread until the initializing + thread reads enough data, receives information about the document + format and exits. This function returns #true# if the + initialization is successful. You can use \Ref{get_flags}() or + \Ref{is_init_complete}() to check more precisely the degree of + initialization. Use \Ref{stop_init}() to interrupt initialization. */ + bool wait_for_complete_init(void); + + /** Wait until we known the number of pages and return. */ + int wait_get_pages_num(void) const; + + /// Returns cache being used. + DjVuFileCache * get_cache(void) const; + + /** @name Saving document to disk */ + //@{ + /** Returns pointer to the \Ref{DjVmDoc} class, which can save the + document contents on the hard disk in one of the two new formats: + {\em bundled} and {\em indirect}. You may also want to look + at \Ref{write}() and \Ref{expand}() if you are interested in + how to save the document. + + {\bf Plugin Warning}. This function will read contents of the whole + document. Thus, if you call it from the main thread (the thread, + which transfers data from Netscape), the plugin will block. */ + GP<DjVmDoc> get_djvm_doc(void); + /** Saves the document in the {\em new bundled} format. All the data + is "bundled" into one file and this file is written into the + passed stream. + + If #force_djvm# is #TRUE# then even one page documents will be + saved in the #DJVM BUNDLED# format (inside a #FORM:DJVM#); + + {\bf Plugin Warning}. This function will read contents of the whole + document. Thus, if you call it from the main thread (the thread, + which transfers data from Netscape), the plugin will block. */ + virtual void write(const GP<ByteStream> &str, bool force_djvm=false); + /** Always save as bundled, renaming any files conflicting with the + the names in the supplied GMap. */ + virtual void write(const GP<ByteStream> &str, + const GMap<GUTF8String,void *> &reserved); + /** Saves the document in the {\em new indirect} format when every + page and component are stored in separate files. This format + is ideal for web publishing because it allows direct access to + any page and component. In addition to it, a top-level file + containing the list of all components will be created. To view + the document later in the plugin or in the viewer one should + load the top-level file. + + {\bf Plugin Warning}. This function will read contents of the whole + document. Thus, if you call it from the main thread (the thread, + which transfers data from Netscape), the plugin will block. + + @param codebase - Name of the directory which the document should + be expanded into. + @param idx_name - Name of the top-level file containing the document + directory (basically, list of all files composing the document). + */ + void expand(const GURL &codebase, const GUTF8String &idx_name); + /** This function can be used instead of \Ref{write}() and \Ref{expand}(). + It allows to save the document either in the new #BUNDLED# format + or in the new #INDIRECT# format depending on the value of parameter + #bundled#. + + Depending on the document's type, the meaning of #where# is: + \begin{itemize} + \item For #BUNDLED# documents this is the name of the file + \item For #INDIRECT# documents this is the name of top-level + index file. All document files will be saved into the + save directory where the index file will resize. */ + virtual void save_as(const GURL &where, const bool bundled=0); + //@} + /** Returns pointer to the internal directory of the document, if it + is in one of the new formats: #BUNDLED# or #INDIRECT#. + Otherwise (if the format of the input document is obsolete), + #ZERO# is returned. + + #ZERO# will also be returned if the initializing thread has not + learnt enough information about the document (#DOC_DIR_KNOWN# has + not been set yet). Check \Ref{is_init_complete}() and \Ref{init}() + for details. */ + GP<DjVmDir> get_djvm_dir(void) const; + /** Returns pointer to the document bookmarks. + This applies to #BUNDLED# and #INDIRECT# documents. + + #ZERO# will also be returned if the initializing thread has not + learnt enough information about the document (#DOC_DIR_KNOWN# has + not been set yet). Check \Ref{is_init_complete}() and \Ref{init}() + for details. */ + GP<DjVmNav> get_djvm_nav(void) const; + /** Returns pointer to the internal directory of the document, if it + is in obsolete #OLD_BUNDLED# format. + + #ZERO# will also be returned if the initializing thread has not + learnt enough information about the document (#DOC_DIR_KNOWN# has + not been set yet). Check \Ref{is_init_complete}() and \Ref{init}() + for details. */ + GP<DjVmDir0> get_djvm_dir0(void) const; + /** Returns pointer to {\em navigation directory} of the document. + The navigation directory is a DjVu file containing only one + chunk #NDIR# inside a #FORM:DJVI# with the list of all + document pages. */ + GP<DjVuNavDir> get_nav_dir(void) const; + + /// Create a complete DjVuXML file. + void writeDjVuXML(const GP<ByteStream> &gstr_out,int flags) const; + + /// Returns TRUE if #class_name# is #"DjVuDocument"# or #"DjVuPort"# + virtual bool inherits(const GUTF8String &class_name) const; + + /// Converts the specified id to a URL. + virtual GURL id_to_url(const DjVuPort * source, const GUTF8String &id); + virtual GP<DjVuFile> id_to_file(const DjVuPort * source, const GUTF8String &id); + virtual GP<DataPool> request_data(const DjVuPort * source, const GURL & url); + virtual void notify_file_flags_changed(const DjVuFile * source, + long set_mask, long clr_mask); + + virtual GList<GURL> get_url_names(void); + virtual void set_recover_errors(ErrorRecoveryAction=ABORT); + virtual void set_verbose_eof(bool=true); + + static void set_compress_codec( + void (*codec)(GP<ByteStream> &, const GURL &where, bool bundled)); + + static void set_import_codec( + void (*codec)(GP<DataPool> &,const GURL &url,bool &, bool &)); + +protected: + static void (*djvu_import_codec) ( + GP<DataPool> &pool, const GURL &url,bool &needs_compression, bool &needs_rename ); + static void (*djvu_compress_codec) ( + GP<ByteStream> &bs, const GURL &where, bool bundled); + virtual GP<DjVuFile> url_to_file(const GURL & url, bool dont_create=false) const; + GURL init_url; + GP<DataPool> init_data_pool; + GP<DjVmDir> djvm_dir; // New-style DjVm directory + GP<DjVmNav> djvm_nav; + int doc_type; + bool needs_compression_flag; + bool can_compress_flag; + bool needs_rename_flag; + + + + bool has_url_names; + GCriticalSection url_names_lock; + GList<GURL> url_names; + ErrorRecoveryAction recover_errors; + bool verbose_eof; +public: + class UnnamedFile; // This really should be protected ... + class ThumbReq; // This really should be protected ... +protected: + bool init_started; + GSafeFlags flags; + GSafeFlags init_thread_flags; + DjVuFileCache * cache; + GP<DjVuSimplePort> simple_port; + + GP<DjVmDir0> djvm_dir0; // Old-style DjVm directory + GP<DjVuNavDir> ndir; // Old-style navigation directory + GUTF8String first_page_name;// For OLD_BUNDLED docs only + + // The following is used in init() and destructor to query NDIR + // DO NOT USE FOR ANYTHING ELSE. THE FILE IS ZEROED IMMEDIATELY + // AFTER IT'S NO LONGER NEEDED. If you don't zero it, ~DjVuDocument() + // will kill it, which is a BAD thing if the file's already in cache. + GP<DjVuFile> ndir_file; + + GPList<UnnamedFile> ufiles_list; + GCriticalSection ufiles_lock; + + GPList<ThumbReq> threqs_list; + GCriticalSection threqs_lock; + + GP<DjVuDocument> init_life_saver; + + static const float thumb_gamma; + + // Reads document contents in another thread trying to determine + // its type and structure + GThread init_thr; + static void static_init_thread(void *); + void init_thread(void); + + void check() const; + + void process_threqs(void); + GP<ThumbReq> add_thumb_req(const GP<ThumbReq> & thumb_req); + + void add_to_cache(const GP<DjVuFile> & f); + void check_unnamed_files(void); + GUTF8String get_int_prefix(void) const; + void set_file_aliases(const DjVuFile * file); + GURL invent_url(const GUTF8String &name) const; +}; + +class DjVuDocument::UnnamedFile : public GPEnabled +{ +public: + enum { ID, PAGE_NUM }; + int id_type; + GUTF8String id; + int page_num; + GURL url; + GP<DjVuFile> file; + GP<DataPool> data_pool; +protected: + UnnamedFile(int xid_type, const GUTF8String &xid, int xpage_num, const GURL & xurl, + const GP<DjVuFile> & xfile) : + id_type(xid_type), id(xid), page_num(xpage_num), url(xurl), file(xfile) {} + friend class DjVuDocument; +}; + +class DjVuDocument::ThumbReq : public GPEnabled +{ +public: + int page_num; + GP<DataPool> data_pool; + + // Either of the next two blocks should present + GP<DjVuFile> image_file; + + int thumb_chunk; + GP<DjVuFile> thumb_file; +protected: + ThumbReq(int xpage_num, const GP<DataPool> & xdata_pool) : + page_num(xpage_num), data_pool(xdata_pool) {} + friend class DjVuDocument; +}; + +inline void +DjVuDocument::init(const GURL &url, GP<DjVuPort> port, DjVuFileCache *cache) +{ + start_init(url,port,cache); + wait_for_complete_init(); +} + +inline GP<DjVuDocument> +DjVuDocument::create( + const GURL &url, GP<DjVuPort> xport, DjVuFileCache * const xcache) +{ + DjVuDocument *doc=new DjVuDocument; + GP<DjVuDocument> retval=doc; + doc->start_init(url,xport,xcache); + return retval; +} + +inline bool +DjVuDocument::is_init_complete(void) const +{ + return (flags & (DOC_INIT_OK | DOC_INIT_FAILED))!=0; +} + +inline bool +DjVuDocument::is_init_ok(void) const +{ + return (flags & DOC_INIT_OK)!=0; +} + +inline void +DjVuDocument::set_needs_compression(void) +{ + needs_compression_flag=true; +} + +inline bool +DjVuDocument::needs_compression(void) const +{ + return needs_compression_flag; +} + +inline bool +DjVuDocument::needs_rename(void) const +{ + return needs_rename_flag; +} + +inline bool +DjVuDocument::can_compress(void) const +{ + return can_compress_flag; +} + +inline bool +DjVuDocument::is_init_failed(void) const +{ + return (flags & DOC_INIT_FAILED)!=0; +} + +inline int +DjVuDocument::get_doc_type(void) const { return doc_type; } + +inline long +DjVuDocument::get_doc_flags(void) const { return flags; } + +inline bool +DjVuDocument::is_bundled(void) const +{ + return doc_type==BUNDLED || doc_type==OLD_BUNDLED; +} + +inline GURL +DjVuDocument::get_init_url(void) const { return init_url; } + +inline GP<DataPool> +DjVuDocument::get_init_data_pool(void) const { return init_data_pool; } + +inline bool +DjVuDocument::inherits(const GUTF8String &class_name) const +{ + return + (GUTF8String("DjVuDocument") == class_name) || + DjVuPort::inherits(class_name); +// !strcmp("DjVuDocument", class_name) || +// DjVuPort::inherits(class_name); +} + +inline float +DjVuDocument::get_thumbnails_gamma(void) const +{ + return thumb_gamma; +} + +inline DjVuFileCache * +DjVuDocument::get_cache(void) const +{ + return cache; +} + +inline GP<DjVmDir> +DjVuDocument::get_djvm_dir(void) const +{ + if (doc_type==SINGLE_PAGE) + G_THROW( ERR_MSG("DjVuDocument.no_dir") ); + if (doc_type!=BUNDLED && doc_type!=INDIRECT) + G_THROW( ERR_MSG("DjVuDocument.obsolete") ); + return djvm_dir; +} + +inline GP<DjVmNav> +DjVuDocument::get_djvm_nav(void) const +{ + if (doc_type==BUNDLED || doc_type==INDIRECT) + return djvm_nav; + return 0; +} + +inline GP<DjVmDir0> +DjVuDocument::get_djvm_dir0(void) const +{ + if (doc_type!=OLD_BUNDLED) + G_THROW( ERR_MSG("DjVuDocument.old_bundle") ); + return djvm_dir0; +} + +inline GP<DjVuNavDir> +DjVuDocument::get_nav_dir(void) const +{ + return ndir; +} + +inline void +DjVuDocument::set_recover_errors(ErrorRecoveryAction recover) +{ + recover_errors=recover; +} + +inline void +DjVuDocument::set_verbose_eof(bool verbose) +{ + verbose_eof=verbose; +} + +//@} + + +#ifdef HAVE_NAMESPACES +} +# ifndef NOT_USING_DJVU_NAMESPACE +using namespace DJVU; +# endif +#endif +#endif |