summaryrefslogtreecommitdiffstats
path: root/kviewshell/plugins/djvu/libdjvu/DjVuText.h
diff options
context:
space:
mode:
Diffstat (limited to 'kviewshell/plugins/djvu/libdjvu/DjVuText.h')
-rw-r--r--kviewshell/plugins/djvu/libdjvu/DjVuText.h281
1 files changed, 281 insertions, 0 deletions
diff --git a/kviewshell/plugins/djvu/libdjvu/DjVuText.h b/kviewshell/plugins/djvu/libdjvu/DjVuText.h
new file mode 100644
index 00000000..61ee3667
--- /dev/null
+++ b/kviewshell/plugins/djvu/libdjvu/DjVuText.h
@@ -0,0 +1,281 @@
+//C- -*- C++ -*-
+//C- -------------------------------------------------------------------
+//C- DjVuLibre-3.5
+//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
+//C- Copyright (c) 2001 AT&T
+//C-
+//C- This software is subject to, and may be distributed under, the
+//C- GNU General Public License, Version 2. The license should have
+//C- accompanied the software or you may obtain a copy of the license
+//C- from the Free Software Foundation at http://www.fsf.org .
+//C-
+//C- This program is distributed in the hope that it will be useful,
+//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
+//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//C- GNU General Public License for more details.
+//C-
+//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
+//C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech
+//C- Software authorized us to replace the original DjVu(r) Reference
+//C- Library notice by the following text (see doc/lizard2002.djvu):
+//C-
+//C- ------------------------------------------------------------------
+//C- | DjVu (r) Reference Library (v. 3.5)
+//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
+//C- | The DjVu Reference Library is protected by U.S. Pat. No.
+//C- | 6,058,214 and patents pending.
+//C- |
+//C- | This software is subject to, and may be distributed under, the
+//C- | GNU General Public License, Version 2. The license should have
+//C- | accompanied the software or you may obtain a copy of the license
+//C- | from the Free Software Foundation at http://www.fsf.org .
+//C- |
+//C- | The computer code originally released by LizardTech under this
+//C- | license and unmodified by other parties is deemed "the LIZARDTECH
+//C- | ORIGINAL CODE." Subject to any third party intellectual property
+//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
+//C- | non-exclusive license to make, use, sell, or otherwise dispose of
+//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
+//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
+//C- | General Public License. This grant only confers the right to
+//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
+//C- | the extent such infringement is reasonably necessary to enable
+//C- | recipient to make, have made, practice, sell, or otherwise dispose
+//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
+//C- | any greater extent that may be necessary to utilize further
+//C- | modifications or combinations.
+//C- |
+//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
+//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
+//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+//C- +------------------------------------------------------------------
+//
+// $Id: DjVuText.h,v 1.10 2003/11/07 22:08:21 leonb Exp $
+// $Name: release_3_5_15 $
+
+#ifndef _DJVUTEXT_H
+#define _DJVUTEXT_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#if NEED_GNUG_PRAGMAS
+# pragma interface
+#endif
+
+
+
+/** @name DjVuText.h
+
+ Files #"DjVuText.h"# and #"DjVuText.cpp"# implement the mechanism for
+ text in DjVuImages.
+
+ This file implements annotations understood by the DjVu plugins
+ and encoders.
+
+
+ using: contents of #TXT*# chunks.
+
+ Contents of the #FORM:TEXT# should be passed to \Ref{DjVuText::decode}()
+ for parsing, which initializes \Ref{DjVuText::TXT}
+ and fills them with decoded data.
+ @memo Implements support for DjVuImage hidden text.
+ @author Andrei Erofeev <eaf@geocities.com>
+ @version
+ #$Id: DjVuText.h,v 1.10 2003/11/07 22:08:21 leonb Exp $# */
+//@{
+
+
+#include "GMapAreas.h"
+
+#ifdef HAVE_NAMESPACES
+namespace DJVU {
+# ifdef NOT_DEFINED // Just to fool emacs c++ mode
+}
+#endif
+#endif
+
+
+class ByteStream;
+
+// -------- DJVUTXT --------
+
+/** Description of the text contained in a DjVu page. This class contains the
+ textual data for the page. It describes the text as a hierarchy of zones
+ corresponding to page, column, region, paragraph, lines, words, etc...
+ The piece of text associated with each zone is represented by an offset
+ and a length describing a segment of a global UTF8 encoded string. */
+
+class DjVuTXT : public GPEnabled
+{
+protected:
+ DjVuTXT(void) {}
+public:
+ /// Default creator
+ static GP<DjVuTXT> create(void) {return new DjVuTXT();}
+
+ /** These constants are used to tell what a zone describes.
+ This can be useful for a copy/paste application.
+ The deeper we go into the hierarchy, the higher the constant. */
+ enum ZoneType { PAGE=1, COLUMN=2, REGION=3, PARAGRAPH=4,
+ LINE=5, WORD=6, CHARACTER=7 };
+ /** Data structure representing document textual components.
+ The text structure is represented by a hierarchy of rectangular zones. */
+ class Zone
+ {
+ public:
+ Zone();
+ /** Type of the zone. */
+ enum ZoneType ztype;
+ /** Rectangle spanned by the zone */
+ GRect rect;
+ /** Position of the zone text in string #textUTF8#. */
+ int text_start;
+ /** Length of the zone text in string #textUTF8#. */
+ int text_length;
+ /** List of children zone. */
+ GList<Zone> children;
+ /** Appends another subzone inside this zone. The new zone is initialized
+ with an empty rectangle, empty text, and has the same type as this
+ zone. */
+ Zone *append_child();
+ /** Find the text_start and text_end indicated by the given box. */
+ void get_text_with_rect(const GRect &box,
+ int &string_start,int &string_end ) const;
+ /** Find the zones used by the specified string and append them to the list. */
+ void find_zones(GList<Zone *> &list,
+ const int string_start, const int string_end) const;
+ /** Finds the smallest rectangles and appends them to the list. */
+ void get_smallest(GList<GRect> &list) const;
+ /** Finds the smallest rectangles and appends them to the list after
+ padding the smallest unit to fit width or height for the parent rectangle
+ and adding the number of specified pixels. */
+ void get_smallest(GList<GRect> &list,const int padding) const;
+ /// Find out this Zone's parent.
+ const Zone *get_parent(void) const;
+ private:
+ friend class DjVuTXT;
+ const Zone *zone_parent;
+ void cleartext();
+ void normtext(const char *instr, GUTF8String &outstr);
+ unsigned int memuse() const;
+ static const int version;
+ void encode(const GP<ByteStream> &bs,
+ const Zone * parent=0, const Zone * prev=0) const;
+ void decode(const GP<ByteStream> &bs, int maxtext,
+ const Zone * parent=0, const Zone * prev=0);
+ };
+ /** Textual data for this page.
+ The content of this string is encoded using the UTF8 code.
+ This code corresponds to ASCII for the first 127 characters.
+ Columns, regions, paragraph and lines are delimited by the following
+ control character:
+ \begin{tabular}{lll}
+ {\bf Name} & {\bf Octal} & {\bf Ascii name} \\\hline\\
+ {\tt DjVuText::end_of_column} & 013 & VT, Vertical Tab \\
+ {\tt DjVuText::end_of_region} & 035 & GS, Group Separator \\
+ {\tt DjVuText::end_of_paragraph} & 037 & US, Unit Separator \\
+ {\tt DjVuText::end_of_line} & 012 & LF: Line Feed
+ \end{tabular} */
+ GUTF8String textUTF8;
+ static const char end_of_column ; // VT: Vertical Tab
+ static const char end_of_region ; // GS: Group Separator
+ static const char end_of_paragraph ; // US: Unit Separator
+ static const char end_of_line ; // LF: Line Feed
+ /** Main zone in the document.
+ This zone represent the page. */
+ Zone page_zone;
+ /** Tests whether there is a meaningful zone hierarchy. */
+ int has_valid_zones() const;
+ /** Normalize textual data. Assuming that a zone hierarchy has been built
+ and represents the reading order. This function reorganizes the string
+ #textUTF8# by gathering the highest level text available in the zone
+ hierarchy. The text offsets and lengths are recomputed for all the
+ zones in the hierarchy. Separators are inserted where appropriate. */
+ void normalize_text();
+ /** Encode data for a TXT chunk. */
+ void encode(const GP<ByteStream> &bs) const;
+ /** Decode data from a TXT chunk. */
+ void decode(const GP<ByteStream> &bs);
+ /** Returns a copy of this object. */
+ GP<DjVuTXT> copy(void) const;
+ /// Write XML formated text.
+ void writeText(ByteStream &bs,const int height) const;
+ /// Get XML formatted text.
+ GUTF8String get_xmlText(const int height) const;
+ /** Find the text specified by the rectangle. */
+ GList<Zone*> find_text_in_rect(GRect target_rect, GUTF8String &text) const;
+ /** Find the text specified by the rectangle. */
+ GList<GRect> find_text_with_rect(const GRect &box, GUTF8String &text, const int padding=0) const;
+ /** Get all zones of zone type zone_type under node parent.
+ zone_list contains the return value. */
+ void get_zones(int zone_type, const Zone *parent, GList<Zone *> & zone_list) const;
+ /** Returns the number of bytes needed by this data structure. It's
+ used by caching routines to estimate the size of a \Ref{DjVuImage}. */
+ unsigned int get_memory_usage() const;
+};
+
+inline const DjVuTXT::Zone *
+DjVuTXT::Zone::get_parent(void) const
+{
+ return zone_parent;
+}
+
+
+class DjVuText : public GPEnabled
+{
+protected:
+ DjVuText(void) {}
+public:
+ /// Default creator.
+ static GP<DjVuText> create(void) {return new DjVuText();}
+
+ /** Decodes a sequence of annotation chunks and merges contents of every
+ chunk with previously decoded information. This function
+ should be called right after applying \Ref{IFFByteStream::get_chunk}()
+ to data from #FORM:TEXT#. */
+ void decode(const GP<ByteStream> &bs);
+
+ /** Encodes all annotations back into a sequence of chunks to be put
+ inside a #FORM:TEXT#. */
+ void encode(const GP<ByteStream> &bs);
+
+ /// Returns a copy of this object
+ GP<DjVuText> copy(void) const;
+
+ /** Returns the number of bytes needed by this data structure. It's
+ used by caching routines to estimate the size of a \Ref{DjVuImage}. */
+ inline unsigned int get_memory_usage() const;
+
+ /// Write XML formated text.
+ void writeText(ByteStream &bs,const int height) const;
+
+ /// Get XML formatted text.
+ GUTF8String get_xmlText(const int height) const;
+
+ GP<DjVuTXT> txt;
+private: // dummy stuff
+ static void decode(ByteStream *);
+ static void encode(ByteStream *);
+};
+
+//@}
+
+inline unsigned int
+DjVuText::get_memory_usage() const
+{
+ return (txt)?(txt->get_memory_usage()):0;
+}
+
+
+// ----- THE END
+
+#ifdef HAVE_NAMESPACES
+}
+# ifndef NOT_USING_DJVU_NAMESPACE
+using namespace DJVU;
+# endif
+#endif
+#endif
+
+