summaryrefslogtreecommitdiffstats
path: root/reader/src/formats/html/HtmlReaderStream.cpp
blob: 08c43aed689b9aab952387eb4bafb3cf6026ef48 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
 * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */

#include <cstdlib>
#include <cstring>
#include <algorithm>

#include "HtmlReaderStream.h"
#include "HtmlReader.h"

class HtmlTextOnlyReader : public HtmlReader {

public:
	HtmlTextOnlyReader(char *buffer, std::size_t maxSize);
	std::size_t size() const;

private:
	void startDocumentHandler();
	void endDocumentHandler();

	bool tagHandler(const HtmlTag &tag);
	bool characterDataHandler(const char *text, std::size_t len, bool convert);

private:
	char *myBuffer;
	std::size_t myMaxSize;
	std::size_t myFilledSize;
	bool myIgnoreText;
};

HtmlTextOnlyReader::HtmlTextOnlyReader(char *buffer, std::size_t maxSize) : HtmlReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myFilledSize(0), myIgnoreText(false) {
}

std::size_t HtmlTextOnlyReader::size() const {
	return myFilledSize;
}

void HtmlTextOnlyReader::startDocumentHandler() {
}

void HtmlTextOnlyReader::endDocumentHandler() {
}

bool HtmlTextOnlyReader::tagHandler(const HtmlTag &tag) {
	if (tag.Name == "SCRIPT") {
		myIgnoreText = tag.Start;
	}
	if ((myFilledSize < myMaxSize) && (myFilledSize > 0) && (myBuffer[myFilledSize - 1] != '\n')) {
		myBuffer[myFilledSize++] = '\n';
	}
	return myFilledSize < myMaxSize;
}

bool HtmlTextOnlyReader::characterDataHandler(const char *text, std::size_t len, bool) {
	if (!myIgnoreText) {
		len = std::min((std::size_t)len, myMaxSize - myFilledSize);
		std::memcpy(myBuffer + myFilledSize, text, len);
		myFilledSize += len;
	}
	return myFilledSize < myMaxSize;
}

HtmlReaderStream::HtmlReaderStream(shared_ptr<ZLInputStream> base, std::size_t maxSize) : myBase(base), myBuffer(0), mySize(maxSize) {
}

HtmlReaderStream::~HtmlReaderStream() {
	close();
}

bool HtmlReaderStream::open() {
	if (myBase.isNull() || !myBase->open()) {
		return false;
	}
	myBuffer = new char[mySize];
	HtmlTextOnlyReader reader(myBuffer, mySize);
	reader.readDocument(*myBase);
	mySize = reader.size();
	myOffset = 0;
	myBase->close();
	return true;
}

std::size_t HtmlReaderStream::read(char *buffer, std::size_t maxSize) {
	maxSize = std::min(maxSize, mySize - myOffset);
	if (buffer != 0) {
		std::memcpy(buffer, myBuffer, maxSize);
	}
	myOffset += maxSize;
	return maxSize;
}

void HtmlReaderStream::close() {
	if (myBuffer != 0) {
		delete[] myBuffer;
		myBuffer = 0;
	}
}

void HtmlReaderStream::seek(int offset, bool absoluteOffset) {
	if (!absoluteOffset) {
		offset += myOffset;
	}
	myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
}

std::size_t HtmlReaderStream::offset() const {
	return myOffset;
}

std::size_t HtmlReaderStream::sizeOfOpened() {
	return mySize;
}