summaryrefslogtreecommitdiffstats
path: root/lib/libchmfile/libchmfile.h
blob: ab129b5ca6d4deb2a29a505e53ef21ac2e3b308c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
/***************************************************************************
 *   Copyright (C) 2004-2007 by Georgy Yunaev, gyunaev@ulduzsoft.com       *
 *   Please do not use email address above for bug reports; see            *
 *   the README file                                                       *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.             *
 ***************************************************************************/

#ifndef INCLUDE_LIBCHMFILE_H
#define INCLUDE_LIBCHMFILE_H

#include <qstring.h>
#include <qcstring.h>
#include <qlistview.h>
#include <qlistbox.h>
#include <qmap.h>
#include <qvaluevector.h>
#include <qtextcodec.h> 

#include "libchmtextencoding.h"

// Qt3/Qt4 compatibility: in Qt3 QVector stores pointers, not values - so QValueVector should be used. 
// In Qt4 QVector stores values, so we can use QVector
#if defined (USE_QT_4)
	#define	QT34VECTOR	QVector
#else
	#define	QT34VECTOR	QValueVector
#endif


//! Contains different (non-standard) image types
namespace LCHMBookIcons
{
	const int IMAGE_NONE = -1;
	const int IMAGE_AUTO = -2;
	const int IMAGE_INDEX = -3;
	
	const int MAX_BUILTIN_ICONS = 42;
};


//! Contains a single index or TOC entry. See LCHMFile::parseTOC() and LCHMFile::parseIndex()
typedef struct
{
	//! Entry name
	QString		name;
	
	//! Entry URLs. The TOC entry should have only one URL; the index entry could have several.
	QStringList	urls;
	
	//! Associated image number. Used for TOC only; indexes does not have the image. 
	//! Use LCHMFile::getBookIconPixmap() to get associated pixmap icon
	int			imageid;
	
	//! Indentation level for this entry.
	int			indent;

} LCHMParsedEntry;


// forward declaration
class LCHMFileImpl;

//! CHM files processor, heavily based on chmlib. Used search code from xchm.
class LCHMFile
{
	public:
		//! Default constructor and destructor.
		LCHMFile();
		~LCHMFile();
		
		/*!
		 * \brief Attempts to load a .chm file.
		 * \param archiveName The .chm filename.
		 * \return true on success, false on failure.
		 *
		 * Loads a CHM file. Could internally load more than one file, if files linked to 
		 * this one are present locally (like MSDN).
		 * \ingroup init
		 */
		bool loadFile( const QString& archiveName );

		/*!
		 * \brief Closes all the files, and frees the appropriate data.
		 * \ingroup init
		 */
		void closeAll();
		
		/*!
		 * \brief Gets the title name of the opened .chm.
		 * \return The name of the opened document, or an empty string if no .chm has been loaded.
		 * \ingroup information
		 */
		QString title() const;
		
		/*!
		 * \brief Gets the URL of the default page in the chm archive.
		 * \return The home page name, with a '/' added in front and relative to
		 *         the root of the archive filesystem. If no .chm has been opened,
		 *         returns "/".
		 * \ingroup information
		 */
		QString homeUrl() const;
		
		/*!
		 * \brief Checks whether the Table of Contents is present in this file.
		 * \return true if it is available; false otherwise.
		 * \ingroup information
		 */
		bool  hasTableOfContents() const;
		
		/*!
		 * \brief Checks whether the Index Table is present in this file.
		 * \return true if it is available; false otherwise.
		 * \ingroup information
		 */
		bool  hasIndexTable() const;
		
		/*!
		 * \brief Checks whether the Search Table is available in this file.
		 * \return true if it is available; false otherwise.
		 * \ingroup information
		 *
		 * If the search table is not available, the search is not possible.
		 */
		bool  hasSearchTable() const;
		
		/*!
		 * \brief Parses the Table of Contents (TOC)
		 * \param topics A pointer to the container which will store the parsed results. 
		 *               Will be cleaned before parsing.
		 * \return true if the tree is present and parsed successfully, false otherwise.
		 *         The parser is built to be error-prone, however it still can abort with qFatal()
		 *         by really buggy chm file; please report a bug if the file is opened ok under Windows.
		 * \ingroup fileparsing
		 */
		bool parseTableOfContents( QT34VECTOR< LCHMParsedEntry > * topics ) const;

		/*!
		 * \brief Parses the Index Table
		 * \param indexes A pointer to the container which will store the parsed results. 
		 *               Will be cleaned before parsing.
		 * \return true if the tree is present and parsed successfully, false otherwise.
		 *         The parser is built to be error-prone, however it still can abort with qFatal()
		 *         by really buggy chm file; so far it never happened on indexes.
		 * \ingroup fileparsing
		 */
		bool parseIndex( QT34VECTOR< LCHMParsedEntry > * indexes ) const;

		/*!
		 * \brief Retrieves the content from url in current chm file to QString.
		 * \param str A string where the retreived content should be stored.
		 * \param url An URL in chm file to retreive content from. Must be absolute.
		 * \return true if the content is successfully received; false otherwise.
		 *
		 * This function retreives the file content (mostly for HTML pages) from the chm archive
		 * opened by load() function. Because the content in chm file is not stored in Unicode, it 
		 * will be recoded according to current encoding. Do not use for binary data.
		 *
		 * \sa setCurrentEncoding() currentEncoding() getFileContentAsBinary()
		 * \ingroup dataretrieve
		 */
		bool getFileContentAsString( QString * str, const QString& url );

		/*!
		 * \brief Retrieves the content from url in current chm file to QByteArray.
		 * \param data A data array where the retreived content should be stored.
		 * \param url An URL in chm file to retreive content from. Must be absolute.
		 * \return true if the content is successfully received; false otherwise.
		 *
		 * This function retreives the file content from the chm archive opened by load() 
		 * function. The content is not encoded.
		 *
		 * \sa getFileContentAsString()
		 * \ingroup dataretrieve
		 */
		bool getFileContentAsBinary( QByteArray * data, const QString& url );
		
		/*!
		 * \brief Retrieves the content size.
		 * \param size A pointer where the size will be stored.
		 * \param url An URL in chm file to retreive content from. Must be absolute.
		 * \return true if the content size is successfully stored; false otherwise.
		 *
		 * \ingroup dataretrieve
		 */
		bool getFileSize( unsigned int * size, const QString& url );
		
		/*!
		 * \brief Obtains the list of all the files in current chm file archive.
		 * \param files An array to store list of URLs (file names) present in chm archive.
		 * \return true if the enumeration succeed; false otherwise (I could hardly imagine a reason).
		 *
		 * \ingroup dataretrieve
		 */
		bool enumerateFiles( QStringList * files );
	
		/*!
		 * \brief Gets the Title of the HTML page referenced by url.
		 * \param url An URL in chm file to get title from. Must be absolute.
		 * \return The title, or QString::null if the URL cannot be found or not a HTML page.
		 *
		 * \ingroup dataretrieve
		 */
		QString		getTopicByUrl ( const QString& url );
	
		/*!
		 * \brief Gets the appropriate CHM pixmap icon.
		 * \param imagenum The image number from TOC.
		 * \return The pixmap to show in TOC tree.
		 *
		 * \ingroup dataretrieve
		 */
		const QPixmap * getBookIconPixmap( unsigned int imagenum );
		
		/*!
		 * \brief Normalizes the URL, converting relatives, adding "/" in front and removing ..
		 * \param url The URL to normalize.
		 * \return The normalized, cleaned up URL.
		 *
		 * \ingroup dataretrieve
		 */
		QString normalizeUrl( const QString& url ) const;
		
		/*!
		 * \brief Gets the current CHM archive encoding (set or autodetected)
		 * \return The current encoding.
		 *
		 * \ingroup encoding
		 */
		const LCHMTextEncoding * currentEncoding() const;
		
		/*!
		 * \brief Sets the CHM archive encoding to use
		 * \param encoding An encoding to use.
		 *
		 * \ingroup encoding
		 */
		bool setCurrentEncoding ( const LCHMTextEncoding * encoding );
		
		/*!
		 * \brief Execute a search query, return the results.
		 * \param query A search query.
		 * \param results An array to store URLs where the query was found.
		 * \return true if search was successful (this does not mean that it returned any results); 
		 *         false otherwise.
		 *
		 * This function executes a standard search query. The query should consist of one of more 
		 *  words separated by a space with a possible prefix. A prefix may be:
		 *   +   Plus indicates that the word is required; any page without this word is excluded from the result.
		 *   -   Minus indicates that the word is required to be absent; any page with this word is excluded from
		 *       the result.
		 *   "." Quotes indicates a phrase. Anything between quotes is a phrase, which is set of space-separated
		 *       words. Will be in result only if the words in phrase are in page in the same sequence, and
		 *       follow each other.
		 *
		 *   If there is no prefix, the word considered as required.
		 * \ingroup search
		 */
		bool	searchQuery ( const QString& query, QStringList * results, unsigned int limit = 100 );
		
		//! Access to implementation
		LCHMFileImpl * impl()	{ return m_impl; }
		
	private:
		//! No copy construction allowed.
		LCHMFile( const LCHMFile& );
		
		//! No assignments allowed.
		LCHMFile& operator=( const LCHMFile& );
		
		//! Implementation
		LCHMFileImpl *	m_impl;
};


#endif // INCLUDE_LIBCHMFILE_H