1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
/*
* Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include "HtmlReaderStream.h"
#include "HtmlReader.h"
class HtmlTextOnlyReader : public HtmlReader {
public:
HtmlTextOnlyReader(char *buffer, std::size_t maxSize);
std::size_t size() const;
private:
void startDocumentHandler();
void endDocumentHandler();
bool tagHandler(const HtmlTag &tag);
bool characterDataHandler(const char *text, std::size_t len, bool convert);
private:
char *myBuffer;
std::size_t myMaxSize;
std::size_t myFilledSize;
bool myIgnoreText;
};
HtmlTextOnlyReader::HtmlTextOnlyReader(char *buffer, std::size_t maxSize) : HtmlReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myFilledSize(0), myIgnoreText(false) {
}
std::size_t HtmlTextOnlyReader::size() const {
return myFilledSize;
}
void HtmlTextOnlyReader::startDocumentHandler() {
}
void HtmlTextOnlyReader::endDocumentHandler() {
}
bool HtmlTextOnlyReader::tagHandler(const HtmlTag &tag) {
if (tag.Name == "SCRIPT") {
myIgnoreText = tag.Start;
}
if ((myFilledSize < myMaxSize) && (myFilledSize > 0) && (myBuffer[myFilledSize - 1] != '\n')) {
myBuffer[myFilledSize++] = '\n';
}
return myFilledSize < myMaxSize;
}
bool HtmlTextOnlyReader::characterDataHandler(const char *text, std::size_t len, bool) {
if (!myIgnoreText) {
len = std::min((std::size_t)len, myMaxSize - myFilledSize);
std::memcpy(myBuffer + myFilledSize, text, len);
myFilledSize += len;
}
return myFilledSize < myMaxSize;
}
HtmlReaderStream::HtmlReaderStream(shared_ptr<ZLInputStream> base, std::size_t maxSize) : myBase(base), myBuffer(0), mySize(maxSize) {
}
HtmlReaderStream::~HtmlReaderStream() {
close();
}
bool HtmlReaderStream::open() {
if (myBase.isNull() || !myBase->open()) {
return false;
}
myBuffer = new char[mySize];
HtmlTextOnlyReader reader(myBuffer, mySize);
reader.readDocument(*myBase);
mySize = reader.size();
myOffset = 0;
myBase->close();
return true;
}
std::size_t HtmlReaderStream::read(char *buffer, std::size_t maxSize) {
maxSize = std::min(maxSize, mySize - myOffset);
if (buffer != 0) {
std::memcpy(buffer, myBuffer, maxSize);
}
myOffset += maxSize;
return maxSize;
}
void HtmlReaderStream::close() {
if (myBuffer != 0) {
delete[] myBuffer;
myBuffer = 0;
}
}
void HtmlReaderStream::seek(int offset, bool absoluteOffset) {
if (!absoluteOffset) {
offset += myOffset;
}
myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
}
std::size_t HtmlReaderStream::offset() const {
return myOffset;
}
std::size_t HtmlReaderStream::sizeOfOpened() {
return mySize;
}
|