diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htnet/HtFile.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htnet/HtFile.cc | 341 |
1 files changed, 341 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htnet/HtFile.cc b/debian/htdig/htdig-3.2.0b6/htnet/HtFile.cc new file mode 100644 index 00000000..45f1632e --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htnet/HtFile.cc @@ -0,0 +1,341 @@ +// +// HtFile.cc +// +// HtFile: Interface classes for retriving local documents +// +// Including: +// - Generic class +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: HtFile.cc,v 1.13 2004/05/28 13:15:23 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "lib.h" +#include "Transport.h" +#include "HtFile.h" +#include "Dictionary.h" +#include "StringList.h" +#include "defaults.h" // for config + +#include <signal.h> +#include <sys/types.h> +#include <ctype.h> + +#ifdef HAVE_STD +#include <iostream> +#include <fstream> +#ifdef HAVE_NAMESPACES +using namespace std; +#endif +#else +#include <iostream.h> +#include <fstream.h> +#endif /* HAVE_STD */ + +#include <stdio.h> // for sscanf +#include <sys/stat.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif + +#ifdef _MSC_VER /* _WIN32 */ +#include "dirent_local.h" +#else +#include <dirent.h> // for scandir +#endif + +#ifdef _MSC_VER /* _WIN32 */ +#define popen _popen +#define pclose _pclose +#define lstat stat +#define readlink(x,y,z) {-1} +#endif + + +/////// + // HtFile_Response class +/////// + + +// Construction + +HtFile_Response::HtFile_Response() +{ +} + + +// Destruction + +HtFile_Response::~HtFile_Response() +{ +} + +/////// + // HtFile generic class + // + // +/////// + + +// Construction + +HtFile::HtFile() +{ +} + +// Destruction + +HtFile::~HtFile() +{ + // It's empty +} + + +// Return mime type indicated by extension ext (which is assumed not +// to contain the '.'), or NULL if ext is not a know mime type. +const String *HtFile::Ext2Mime (const char *ext) +{ + static Dictionary *mime_map = 0; + + if (!mime_map) + { + HtConfiguration* config= HtConfiguration::config(); + mime_map = new Dictionary(); + if (!mime_map) + return NULL; + + if (debug > 2) + cout << "MIME types: " << config->Find("mime_types").get() << endl; + ifstream in(config->Find("mime_types").get()); + if (in) + { + String line; + while (in >> line) + { + line.chop("\n\r \t"); + int cmt; + if ((cmt = line.indexOf('#')) >= 0) + line = line.sub(0, cmt); + StringList split_line(line, "\t "); + // Let's cache mime type to lesser the number of + // operator [] callings + String mime_type = split_line[0]; + // Fill map with values. + for (int i = 1; i < split_line.Count(); i++) + { + if (debug > 3) + cout << "MIME: " << split_line[i] + << "\t-> " << mime_type << endl; + mime_map->Add(split_line[i], new String(mime_type)); + } + } + } + else + { + if (debug > 2) + cout << "MIME types file not found. Using default types.\n"; + mime_map->Add(String("html"), new String("text/html")); + mime_map->Add(String("htm"), new String("text/html")); + mime_map->Add(String("txt"), new String("text/plain")); + mime_map->Add(String("asc"), new String("text/plain")); + mime_map->Add(String("pdf"), new String("application/pdf")); + mime_map->Add(String("ps"), new String("application/postscript")); + mime_map->Add(String("eps"), new String("application/postscript")); + } + } + + // return MIME type, or NULL if not found + return (String *)mime_map->Find(ext); +} + +// Return mime type of the file named 'fname'. +// If the type can't be determined, "application/x-unknown" is returned. +String HtFile::File2Mime (const char *fname) +{ + HtConfiguration* config= HtConfiguration::config(); + + // default to "can't identify" + char content_type [100] = "application/x-unknown\n"; + + String cmd = config->Find ("content_classifier"); + if (cmd.get() && *cmd) + { + cmd << " \"" << fname << '\"'; // allow file names to have spaces + FILE *fileptr; + if ( (fileptr = popen (cmd.get(), "r")) != NULL ) + { + fgets (content_type, sizeof (content_type), fileptr); + pclose (fileptr); + } + } + + // Remove trailing newline, charset or language information + int delim = strcspn (content_type, ",; \n\t"); + content_type [delim] = '\0'; + + if (debug > 1) + cout << "Mime type: " << fname << ' ' << content_type << endl; + return (String (content_type)); +} + +/////// + // Manages the requesting process +/////// + +HtFile::DocStatus HtFile::Request() +{ + // Reset the response + _response.Reset(); + + struct stat stat_buf; + + String path (_url.path()); + decodeURL (path); // Convert '%20' to ' ' etc + + // Check that it exists, and is a regular file or directory + // Don't allow symbolic links to directories; they mess up '../'. + // Should we allow FIFO's? + if ( stat(path.get(), &stat_buf) != 0 || + !(S_ISREG(stat_buf.st_mode) || S_ISDIR(stat_buf.st_mode)) ) + { + return Transport::Document_not_found; + } + + // Now handle directories with a pseudo-HTML document (and appropriate noindex) + if ( S_ISDIR(stat_buf.st_mode) ) + { + _response._content_type = "text/html"; + _response._contents = "<html><head><meta name=\"robots\" content=\"noindex\">\n"; + + struct dirent *namelist; + DIR *dirList; + String filename; + String encodedName; + + if (( dirList = opendir(path.get()) )) + { + while (( namelist = readdir(dirList) )) + { + filename = path; + filename << namelist->d_name; + + if ( namelist->d_name[0] != '.' + && lstat(filename.get(), &stat_buf) == 0 ) + { + // Recursively resolve symbolic links. + // Could leave "absolute" links, or even all not + // containing '../'. That would allow "aliasing" of + // directories without causing loops. + + int i; // avoid infinite loops + for (i=0; (stat_buf.st_mode & S_IFMT) == S_IFLNK && i<10; i++) + { + char link [100]; + int count = readlink(filename.get(), link, sizeof(link)-1); + + if (count < 0) + break; + link [count] = '\0'; + encodedName = link; + encodeURL (encodedName); + URL newURL (encodedName, _url); // resolve relative paths + filename = newURL.path(); + decodeURL (filename); + if (debug > 2) + cout << "Link to " << link << " gives " + << filename.get() << endl; + lstat(filename.get(), &stat_buf); + } + // filename now only sym-link if nested too deeply or I/O err. + + encodeURL (filename, UNRESERVED "/"); // convert ' ' to '%20' etc., but leave "/" intact + if (S_ISDIR(stat_buf.st_mode)) + _response._contents << "<link href=\"file://" + << filename.get() << "/\">\n"; + else if (S_ISREG(stat_buf.st_mode)) + _response._contents << "<link href=\"file://" + << filename.get() << "\">\n"; + } + } + closedir(dirList); + } + + _response._contents << "</head><body></body></html>\n"; + + if (debug > 4) + cout << " Directory listing: " << endl << _response._contents << endl; + + _response._content_length = stat_buf.st_size; + _response._document_length = _response._contents.length(); + _response._modification_time = new HtDateTime(stat_buf.st_mtime); + _response._status_code = 0; + return Transport::Document_ok; + } + + if (_modification_time && *_modification_time >= HtDateTime(stat_buf.st_mtime)) + return Transport::Document_not_changed; + + bool unknown_ext = false; + char *ext = strrchr(path.get(), '.'); + if (ext == NULL) + unknown_ext = true; + else + { + const String *mime_type = Ext2Mime(ext + 1); + if (mime_type) + _response._content_type = *mime_type; + else + unknown_ext = true; + } + if (unknown_ext) + { + _response._content_type = File2Mime (path.get()); + if (!strncmp (_response._content_type.get(), "application/x-", 14)) + return Transport::Document_not_local; + } + + _response._modification_time = new HtDateTime(stat_buf.st_mtime); + + FILE *f = fopen((const char *)path.get(), "r"); + if (f == NULL) + return Document_not_found; + + char docBuffer[8192]; + int bytesRead; + while ((bytesRead = fread(docBuffer, 1, sizeof(docBuffer), f)) > 0) + { + if (_response._contents.length() + bytesRead > _max_document_size) + bytesRead = _max_document_size - _response._contents.length(); + _response._contents.append(docBuffer, bytesRead); + if (_response._contents.length() >= _max_document_size) + break; + } + fclose(f); + + _response._content_length = stat_buf.st_size; + _response._document_length = _response._contents.length(); + _response._status_code = 0; + + if (debug > 2) + cout << "Read a total of " << _response._document_length << " bytes\n"; + return Transport::Document_ok; +} + +HtFile::DocStatus HtFile::GetDocumentStatus() +{ + // Let's give a look at the return status code + if (_response._status_code == -1) + return Transport::Document_not_found; + return Transport::Document_ok; +} + |