diff options
author | Timothy Pearson <kb9vqf@pearsoncomputing.net> | 2014-10-01 20:48:00 -0500 |
---|---|---|
committer | Timothy Pearson <kb9vqf@pearsoncomputing.net> | 2014-10-01 20:48:00 -0500 |
commit | 8066e87c6ca6eb12af5c427b5fb73ccfe6c97d04 (patch) | |
tree | 75db2666173ee234a5279be82a47e362b76c85fa /tdeio | |
parent | 6d268f3c559ac6351666de4a362a0536ecc7c78a (diff) | |
download | tdelibs-8066e87c6ca6eb12af5c427b5fb73ccfe6c97d04.tar.gz tdelibs-8066e87c6ca6eb12af5c427b5fb73ccfe6c97d04.zip |
Use libmagic for mime type determination
This relates to Bug 656, Bug 661, and others
Diffstat (limited to 'tdeio')
-rw-r--r-- | tdeio/CMakeLists.txt | 2 | ||||
-rw-r--r-- | tdeio/Makefile.am | 3 | ||||
-rw-r--r-- | tdeio/magic | 1068 | ||||
-rw-r--r-- | tdeio/tdeio/CMakeLists.txt | 2 | ||||
-rw-r--r-- | tdeio/tdeio/kmimemagic.cpp | 2258 | ||||
-rw-r--r-- | tdeio/tdeio/kmimemagic.h | 4 |
6 files changed, 167 insertions, 3170 deletions
diff --git a/tdeio/CMakeLists.txt b/tdeio/CMakeLists.txt index dcbc86889..054da69f2 100644 --- a/tdeio/CMakeLists.txt +++ b/tdeio/CMakeLists.txt @@ -36,8 +36,6 @@ link_directories( ##### other data ################################ -install( FILES magic DESTINATION ${MIME_INSTALL_DIR} ) - install( FILES application.desktop kurifilterplugin.desktop kcomprfilter.desktop kscan.desktop kdatatool.desktop diff --git a/tdeio/Makefile.am b/tdeio/Makefile.am index aa23fcb5a..591261d94 100644 --- a/tdeio/Makefile.am +++ b/tdeio/Makefile.am @@ -34,13 +34,10 @@ libtdeio_la_LIBADD = kssl/libkssl.la tdeio/libtdeiocore.la \ ../tdewallet/client/libtdewalletclient.la \ $(LIBZ) $(LIBFAM) $(LIBVOLMGT) $(ACL_LIBS) $(LIB_QT) $(LIB_TDECORE) $(top_builddir)/dcop/libDCOP.la $(LIB_X11) -kde_mime_DATA = magic kde_servicetypes_DATA = application.desktop kurifilterplugin.desktop \ kcomprfilter.desktop kscan.desktop kdatatool.desktop \ tdefileplugin.desktop tdecmodule.desktop -EXTRA_DIST = $(kde_mime_DATA) - update_DATA = tdeioslave.upd update_SCRIPTS = useragent.pl proxytype.pl updatedir = $(kde_datadir)/tdeconf_update diff --git a/tdeio/magic b/tdeio/magic deleted file mode 100644 index dbee7abba..000000000 --- a/tdeio/magic +++ /dev/null @@ -1,1068 +0,0 @@ -# Magic data for KMimeMagic (originally for file(1) command) -# -# The format is 4-5 columns: -# Column #1: byte number to begin checking from, ">" indicates continuation -# Column #2: type of data to match -# Column #3: contents of data to match -# Column #4: MIME type of result - -#------------------------------------------------------------------------------ -# Localstuff: file(1) magic for locally observed files -# Add any locally observed files here. - -#------------------------------------------------------------------------------ -# end local stuff -#------------------------------------------------------------------------------ - -#------------------------------------------------------------------------------ -# audio: file(1) magic for sound formats (see also "iff") -# -# Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com), -# and others -# - -# Sun/NeXT audio data -0 string .snd ->12 belong 1 audio/basic ->12 belong 2 audio/basic ->12 belong 3 audio/basic ->12 belong 4 audio/basic ->12 belong 5 audio/basic ->12 belong 6 audio/basic ->12 belong 7 audio/basic - ->12 belong 23 audio/x-adpcm ->12 belong 24 audio/x-adpcm ->12 belong 25 audio/x-adpcm ->12 belong 26 audio/x-adpcm ->12 belong 27 audio/x-adpcm - -# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format -# that uses little-endian encoding and has a different magic number -0 lelong 0x0064732E ->12 lelong 1 audio/x-adpcm ->12 lelong 2 audio/x-adpcm ->12 lelong 3 audio/x-adpcm ->12 lelong 4 audio/x-adpcm ->12 lelong 5 audio/x-adpcm ->12 lelong 6 audio/x-adpcm ->12 lelong 7 audio/x-adpcm -# compressed (G.721 ADPCM) ->12 lelong 23 audio/x-adpcm - -# Creative Labs AUDIO stuff -0 string MThd audio/x-midi -#0 string CTMF Creative Music (CMF) data -#0 string SBI SoundBlaster instrument data -#0 string Creative\ Voice\ File Creative Labs voice data - -# Real Audio (Magic .ra\0375) -0 belong 0x2e7261fd audio/vnd.rn-realaudio -0 string .RMF application/vnd.rn-realmedia - -# OGG files -# For theora at position 87, see bug #109598 -0 string OggS application/ogg ->28 string \x01vorbis audio/vorbis ->28 string fLaC audio/x-oggflac ->28 string \x80theora video/x-theora ->87 string \x80theora video/x-theora ->28 string Speex\ \ audio/x-speex ->29 string video video/x-ogm ->29 string FLAC audio/x-oggflac - -# FLAC files -0 string fLaC audio/x-flac - -# Musepack files -0 string MP+ audio/x-musepack - -# C64 PSID sound files -0 string PSID audio/prs.sid - - -#------------------------------------------------------------------------------ -# riff: file(1) magic for RIFF format -# See -# -# http://www.seanet.com/users/matts/riffmci/riffmci.htm -# - -# RIFF (little-endian) data -0 string RIFF -# RIFF MIDI format -#>8 string RMID audio/x-midi? -# Microsoft WAVE format (*.wav) ->8 string WAVE audio/x-wav ->>20 leshort 80 audio/mpeg ->>20 leshort 85 audio/x-mp3 -# Corel Draw Picture -#>8 string CDRA Corel Draw Picture -# AVI == Audio Video Interleave ->8 string AVI\040 video/x-msvideo - -# RIFF (big-endian) data -0 string RIFX -# RIFF MIDI format -#>8 string RMID \b, MIDI -# Microsoft WAVE format (*.wav) ->8 string WAVE audio/x-wav -# Corel Draw Picture -#>8 string CDRA \b, Corel Draw Picture -# AVI == Audio Video Interleave ->8 string AVI\040 video/x-msvideo - - -#------------------------------------------------------------------------------ -# iff: file(1) magic for Interchange File Format (see also "audio" & "images") -# -# Daniel Quinlan (quinlan@yggdrasil.com) -- IFF was designed by Electronic -# Arts for file interchange. It has also been used by Apple, SGI, and -# especially Commodore-Amiga. -# -# IFF files begin with an 8 byte FORM header, followed by a 4 character -# FORM type, which is followed by the first chunk in the FORM. - -0 string FORM ->8 string AIFF audio/x-aiff -# AIFF-C audio data ->8 string AIFC audio/x-aiff -# IFF/8SVX audio data ->8 string 8SVX audio/x-aiff -#>8 string SAMP \b, SAMP sampled audio -#>8 string DTYP \b, DTYP datatype description -#>8 string PTCH \b, PTCH binary patch -# image formats -#>8 string ILBMBMHD \b, ILBM interleaved image -# other formats -#>8 string FTXT \b, FTXT formatted text - -#------------------------------------------------------------------------------ -# KSysV stuff: logfiles and packages belonging to KSysV -# - -# KSysV logfiles -0 string KDE\ System\ V\ Init\ Editor text/x-ksysv-log - -# KSysV init packages -4 string KSysV ->15 byte >0x01 application/x-ksysv-package - -#------------------------------------------------------------------------------ -# c-lang: file(1) magic for C programs or various scripts -# - -# XPM icons (Greg Roelofs, newt@uchicago.edu) -# ideally should go into "images", but entries below would tag XPM as C source -0 string /*\ XPM image/x-xpm - -# this first will upset you if you're a PL/1 shop... (are there any left?) -# in which case rm it; ascmagic will catch real C programs -# C or REXX program text -#0 string /* text/x-c -# C++ program text -#0 string // text/x-c++ - -#------------------------------------------------------------------------------ -# commands: file(1) magic for various shells and interpreters -# -#0 string :\ shell archive or commands for antique kernel text -0 string #!/bin/sh application/x-shellscript -0 string #!\ /bin/sh application/x-shellscript -0 string #!/bin/csh application/x-shellscript -0 string #!\ /bin/csh application/x-shellscript -# korn shell magic, sent by George Wu, gwu@clyde.att.com -0 string #!/bin/ksh application/x-shellscript -0 string #!\ /bin/ksh application/x-shellscript -0 string #!/bin/zsh application/x-shellscript -0 string #!\ /bin/zsh application/x-shellscript -0 string #!/bin/tcsh application/x-shellscript -0 string #!\ /bin/tcsh application/x-shellscript -0 string #!/usr/local/tcsh application/x-shellscript -0 string #!\ /usr/local/tcsh application/x-shellscript -0 string #!/usr/local/bin/tcsh application/x-shellscript -0 string #!\ /usr/local/bin/tcsh application/x-shellscript -# /usr/bin paths for ksh, zsh and tcsh -0 string #!/usr/bin/ksh application/x-shellscript -0 string #!\ /usr/bin/ksh application/x-shellscript -0 string #!/usr/bin/zsh application/x-shellscript -0 string #!\ /usr/bin/zsh application/x-shellscript -0 string #!/usr/bin/tcsh application/x-shellscript -0 string #!\ /usr/bin/tcsh application/x-shellscript -# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de) -0 string #!/bin/bash application/x-shellscript -0 string #!\ /bin/bash application/x-shellscript -0 string #!/usr/local/bin/bash application/x-shellscript -0 string #!\ /usr/local/bin/bash application/x-shellscript - -0 string #!\ /bin/env\ bash application/x-shellscript -0 string #!/bin/env\ bash application/x-shellscript -0 string #!\ /usr/bin/env\ bash application/x-shellscript -0 string #!/usr/bin/env\ bash application/x-shellscript - -# -0 string #!/bin/ash application/x-shellscript -0 string #!\ /bin/ash application/x-shellscript -# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson) -0 string #!/usr/local/bin/zsh application/x-shellscript -0 string #!\ /usr/local/bin/zsh application/x-shellscript -0 string #!/usr/local/bin/ash application/x-shellscript -0 string #!\ /usr/local/bin/ash application/x-shellscript -#0 string #!/usr/local/bin/ae Neil Brown's ae -#0 string #!\ /usr/local/bin/ae Neil Brown's ae -0 string #!/bin/nawk application/x-nawk -0 string #!\ /bin/nawk application/x-nawk -0 string #!/usr/bin/nawk application/x-nawk -0 string #!\ /usr/bin/nawk application/x-nawk -0 string #!/usr/local/bin/nawk application/x-nawk -0 string #!\ /usr/local/bin/nawk application/x-nawk -0 string #!/bin/gawk application/x-gawk -0 string #!\ /bin/gawk application/x-gawk -0 string #!/usr/bin/gawk application/x-gawk -0 string #!\ /usr/bin/gawk application/x-gawk -0 string #!/usr/local/bin/gawk application/x-gawk -0 string #!\ /usr/local/bin/gawk application/x-gawk -# -0 string #!/bin/awk application/x-awk -0 string #!\ /bin/awk application/x-awk -0 string #!/usr/bin/awk application/x-awk -0 string #!\ /usr/bin/awk application/x-awk -#0 string BEGIN application/x-awk - -# For Larry Wall's perl language. The ``eval'' line recognizes an -# outrageously clever hack for USG systems. -# Keith Waclena <keith@cerberus.uchicago.edu> -0 string #!/bin/perl application/x-perl -0 string #!\ /bin/perl application/x-perl -0 string eval\ "exec\ /bin/perl application/x-perl -0 string #!/usr/bin/perl application/x-perl -0 string #!\ /usr/bin/perl application/x-perl -0 string eval\ "exec\ /usr/bin/perl application/x-perl -0 string #!/usr/local/bin/perl application/x-perl -0 string #!\ /usr/local/bin/perl application/x-perl -0 string eval\ "exec\ /usr/local/bin/perl application/x-perl -0 string #!/bin/env\ perl application/x-perl -0 string #!\ /bin/env\ perl application/x-perl -0 string #!/usr/bin/env\ perl application/x-perl -0 string #!\ /usr/bin/env\ perl application/x-perl - -# python. -# -0 string #!/bin/python application/x-python -0 string #!\ /bin/python application/x-python -0 string eval\ "exec\ /bin/python application/x-python -0 string #!/usr/bin/python application/x-python -0 string #!\ /usr/bin/python application/x-python -0 string eval\ "exec\ /usr/bin/python application/x-python -0 string #!/usr/local/bin/python application/x-python -0 string #!\ /usr/local/bin/python application/x-python -0 string eval\ "exec\ /usr/local/bin/python application/x-python -0 string #!/bin/env\ python application/x-python -0 string #!\ /bin/env\ python application/x-python -0 string #!/usr/bin/env\ python application/x-python -0 string #!\ /usr/bin/env\ python application/x-python - -# MAGIC as specified in Python/import.c (1.5 to 2.3.0a) -# 20121 ( YEAR - 1995 ) + MONTH + DAY (little endian followed by "\r\n" -# python 1.5/1.6 byte-compiled -0 belong 0x994e0d0a application/x-python-bytecode -# python 2.0 byte-compiled -0 belong 0x87c60d0a application/x-python-bytecode -# python 2.1 byte-compiled -0 belong 0x2aeb0d0a application/x-python-bytecode -# python 2.2 byte-compiled -0 belong 0x2ded0d0a application/x-python-bytecode -# python 2.3 byte-compiled -0 belong 0x3bf20d0a application/x-python-bytecode - -# ruby -0 string #!/bin/env\ ruby application/x-ruby -0 string #!\ /bin/env\ ruby application/x-ruby -0 string #!/usr/bin/env\ ruby application/x-ruby -0 string #!\ /usr/bin/env\ ruby application/x-ruby - -#------------------------------------------------------------------------------ -# compress: file(1) magic for pure-compression formats (no archives) -# -# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc. -# -# Formats for various forms of compressed data -# Formats for "compress" proper have been moved into "compress.c", -# because it tries to uncompress it to figure out what's inside. - -# standard unix compress -0 string \037\235 application/x-compress - -# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver) -0 string \037\213 application/x-gzip - -# KOffice documents (gzipped, with an idenfication string in the 'orig filename' header) ->10 string KOffice ->>18 string application/x-kchart\004\006 application/x-kchart ->>18 string application/x-kformula\004\006 application/x-kformula ->>18 string application/x-killustrator\004\006 application/x-killustrator ->>18 string application/x-kontour\004\006 application/x-kontour ->>18 string application/x-kpresenter\004\006 application/x-kpresenter ->>18 string application/x-kspread\004\006 application/x-kspread ->>18 string application/x-kword\004\006 application/x-kword ->>18 string application/x-krita\004\006 application/x-krita ->>18 string application/x-kivio\004\006 application/x-kivio ->>18 string application/x-karbon\004\006 application/x-karbon - -# Rosegarden documents (like old KOffice documents, gzipped with id string in header) ->10 string audio/x-rosegarden\000 audio/x-rosegarden ->10 string audio/x-rosegarden-device\000 audio/x-rosegarden-device - -#KOffice documents v1.2 and later (may 1 2002) using zip as a wrapper -0 string PK\003\004 application/x-zip ->30 string mimetype ->>38 string application/x-kchart application/x-kchart ->>38 string application/x-kformula application/x-kformula ->>38 string application/x-kontour application/x-kontour ->>38 string application/x-kpresenter application/x-kpresenter ->>38 string application/x-kspread application/x-kspread ->>38 string application/x-krita application/x-krita ->>38 string application/x-kword application/x-kword ->>38 string application/x-kivio application/x-kivio ->>38 string application/x-karbon application/x-karbon - -#KOffice documents writen using the kzip rewrite used 'unx' based -#zips; dislocating the mimetype. This was (temporarily) for koffice 1.3 (okt 2003). -0 string PK\003\004 ->30 string mimetype ->>55 string application/x-kchart application/x-kchart ->>55 string application/x-kformula application/x-kformula ->>55 string application/x-kontour application/x-kontour ->>55 string application/x-kpresenter application/x-kpresenter ->>55 string application/x-kspread application/x-kspread ->>55 string application/x-krita application/x-krita ->>55 string application/x-kword application/x-kword ->>55 string application/x-kivio application/x-kivio ->>55 string application/x-karbon application/x-karbon - -# OpenOffice.org 1.1 puts the mimetype into the header too -0 string PK\003\004 ->30 string mimetype ->>38 string application/vnd.sun.xml.calc application/vnd.sun.xml.calc ->>38 string application/vnd.sun.xml.calc.template application/vnd.sun.xml.calc.template ->>38 string application/vnd.sun.xml.draw application/vnd.sun.xml.draw ->>38 string application/vnd.sun.xml.draw.template application/vnd.sun.xml.draw.template ->>38 string application/vnd.sun.xml.impress application/vnd.sun.xml.impress ->>38 string application/vnd.sun.xml.impress.template application/vnd.sun.xml.impress.template ->>38 string application/vnd.sun.xml.writer application/vnd.sun.xml.writer ->>38 string application/vnd.sun.xml.writer.master application/vnd.sun.xml.writer.master ->>38 string application/vnd.sun.xml.writer.template application/vnd.sun.xml.writer.template ->>38 string application/vnd.sun.xml.base application/vnd.sun.xml.base - -# OASIS OpenDocument (KOffice >= 1.4 and OpenOffice >= 2.0) -0 string PK\003\004 ->30 string mimetype ->>38 string application/vnd.oasis.opendocument.chart application/vnd.oasis.opendocument.chart ->>38 string application/vnd.oasis.opendocument.formula application/vnd.oasis.opendocument.formula ->>38 string application/vnd.oasis.opendocument.graphics application/vnd.oasis.opendocument.graphics ->>38 string application/vnd.oasis.opendocument.graphics-template application/vnd.oasis.opendocument.graphics-template ->>38 string application/vnd.oasis.opendocument.image application/vnd.oasis.opendocument.image ->>38 string application/vnd.oasis.opendocument.presentation application/vnd.oasis.opendocument.presentation ->>38 string application/vnd.oasis.opendocument.presentation-template application/vnd.oasis.opendocument.presentation-template ->>38 string application/vnd.oasis.opendocument.spreadsheet application/vnd.oasis.opendocument.spreadsheet ->>38 string application/vnd.oasis.opendocument.spreadsheet-template application/vnd.oasis.opendocument.spreadsheet-template ->>38 string application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.text ->>38 string application/vnd.oasis.opendocument.text-template application/vnd.oasis.opendocument.text-template - -# BZIP2 -0 string BZh application/x-bzip2 - -# BZIP -0 string BZ application/x-bzip - -# According to gzip.h, this is the correct byte order for packed data. -0 string \037\036 application/octet-stream -# -# This magic number is byte-order-independent. -# -0 short 017437 application/octet-stream - - -# ID Software's pak data archive -0 string PACK application/x-pak - -# XXX - why *two* entries for "compacted data", one of which is -# byte-order independent, and one of which is byte-order dependent? -# -# compacted data -0 short 0x1fff application/octet-stream -0 string \377\037 application/octet-stream -# huf output -0 short 0145405 application/octet-stream - -# Squeeze and Crunch... -# These numbers were gleaned from the Unix versions of the programs to -# handle these formats. Note that I can only uncrunch, not crunch, and -# I didn't have a crunched file handy, so the crunch number is untested. -# Keith Waclena <keith@cerberus.uchicago.edu> -#0 leshort 0x76FF squeezed data (CP/M, DOS) -#0 leshort 0x76FE crunched data (CP/M, DOS) - -# Freeze -#0 string \037\237 Frozen file 2.1 -#0 string \037\236 Frozen file 1.0 (or gzip 0.5) - -# lzh? -#0 string \037\240 LZH compressed data - -#POSIX tar archive -257 string ustar\0 application/x-tar -#GNU tar archive -257 string ustar\040\040\0 application/x-tar - -# The SVR4 "cpio(4)" hints that there are additional formats, but they -# are defined as "short"s; I think all the new formats are -# character-header formats and thus are strings, not numbers. -0 short 070707 application/x-cpio -0 short 0143561 application/x-cpio -0 string 070707 application/x-cpio -0 string 070701 application/x-cpio -0 string 070702 application/x-cpio - -0 string !<arch>\ndebian application/x-debian-package -0 string =<ar> application/x-archive -0 string !<arch> application/x-archive - -#------------------------------------------------------------------------------ -# -# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com) -# -0 beshort 0xedab ->2 beshort 0xeedb application/x-rpm - -# lzw -0 lelong&0x8080ffff 0x0000081a application/x-arc -# squashed -0 lelong&0x8080ffff 0x0000091a application/x-arc -# uncompressed -0 lelong&0x8080ffff 0x0000021a application/x-arc -# packed -0 lelong&0x8080ffff 0x0000031a application/x-arc -# squeezed -0 lelong&0x8080ffff 0x0000041a application/x-arc -# crunched -0 lelong&0x8080ffff 0x0000061a application/x-arc - -# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) -2 string -lh0- application/x-lha -2 string -lh1- application/x-lha -2 string -lz4- application/x-lha -2 string -lz5- application/x-lha -# [never seen any but the last; -lh4- reported in comp.compression:] -2 string -lzs- application/x-lha -2 string -lh\40- application/x-lha -2 string -lhd- application/x-lha -2 string -lh2- application/x-lha -2 string -lh3- application/x-lha -2 string -lh4- application/x-lha -2 string -lh5- application/x-lha -2 string -lh6- application/x-lha -2 string -lh7- application/x-lha - -# ARJ archiver (jason@jarthur.Claremont.EDU) -0 leshort 0xea60 application/x-arj - -# RAR archiver (Greg Roelofs, newt@uchicago.edu) -0 string Rar! application/x-rar - -# ZIP archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) -0 string PK\003\004 application/x-zip -# Alternate ZIP string (amc@arwen.cs.berkeley.edu) -0 string PK00PK\003\004 application/x-zip - -# Zoo archiver -20 lelong 0xfdc4a7dc application/x-zoo - -# Shell archives -10 string #\ This\ is\ a\ shell\ archive application/x-shellscript - -# ACE archive -7 string **ACE** application/x-ace - -# XZ -0 string \0fd\037\07a\058\05a\000 application/x-xz - - -#------------------------------------------------------------------------------ -# frame: file(1) magic for FrameMaker files -# -# This stuff came on a FrameMaker demo tape, most of which is -# copyright, but this file is "published" as witness the following: -# -0 string \<MakerFile application/x-frame -0 string \<MIFFile application/x-frame -0 string \<MakerDictionary application/x-frame -0 string \<MakerScreenFon application/x-frame -0 string \<MML application/x-frame -0 string \<Book application/x-frame -0 string \<Maker application/x-frame - -#------------------------------------------------------------------------------ -# html: file(1) magic for HTML (HyperText Markup Language) docs -# -# from Daniel Quinlan <quinlan@yggdrasil.com> -# -0 string \<HEAD text/html -0 string \<head text/html -0 string \<BODY text/html -0 string \<body text/html -0 string \<TITLE text/html -0 string \<title text/html -0 string \<html text/html -0 string \<HTML text/html -0 string \<!-- text/html -0 string \<h1 text/html -0 string \<H1 text/html -0 string \<!doctype\ HTML text/html -0 string \<!DOCTYPE\ HTML text/html -0 string \<!doctype\ html text/html -0 string \<!DOCTYPE\ html text/html - -# PHP (offset should be "between 0 and 64"...) -0 string \<?php application/x-php - -# Docbook -0 string \<!doctype\ book\ public\ "-//OASIS//DTD\ DocBook text/docbook -# Hack: <?xml (with version but no encoding etc.) with a docbook mimetype afterwards. -0 string \<?xml ->23 string \<!doctype\ book\ public\ "-//OASIS//DTD\ DocBook text/docbook ->23 string \<!DOCTYPE\ book\ PUBLIC\ "-//OASIS//DTD\ DocBook text/docbook ->23 string \<!doctype\ book\ public\ "-//KDE//DTD\ DocBook text/docbook ->23 string \<!DOCTYPE\ book\ PUBLIC\ "-//KDE//DTD\ DocBook text/docbook - -# Extensible markup language (XML), a subset of SGML -# from Marc Prud'hommeaux (marc@apocalypse.org) -0 string \<?xml text/xml -0 string \<?XML text/xml -0 string \<?Xml text/xml - - -#----------------------------------------------------------------------------- -# troff stuff -# -0 string .\\" application/x-troff -0 string '\\" application/x-troff -0 string '.\\" application/x-troff -0 string \\" application/x-troff - -#------------------------------------------------------------------------------ -# images: file(1) magic for image formats (see also "c-lang" for XPM bitmaps) -# -# originally from jef@helios.ee.lbl.gov (Jef Poskanzer), -# additions by janl@ifi.uio.no as well as others. Jan also suggested -# merging several one- and two-line files into here. -# -# XXX - byte order for GIF and TIFF fields? -# [GRR: TIFF allows both byte orders; GIF is probably little-endian] -# - -# [GRR: what the hell is this doing in here?] -#0 string xbtoa btoa'd file - -# PBMPLUS -# PBM file -0 string P1 image/x-portable-bitmap -# PGM file -0 string P2 image/x-portable-greymap -# PPM file -0 string P3 image/x-portable-pixmap -# PBM "rawbits" file -0 string P4 image/x-portable-bitmap -# PGM "rawbits" file -0 string P5 image/x-portable-greymap -# PPM "rawbits" file -0 string P6 image/x-portable-pixmap - -# NIFF (Navy Interchange File Format, a modification of TIFF) -# [GRR: this *must* go before TIFF] -0 string IIN1 image/x-niff - -0 string II\x2a\x00 ->8 string CR\x02 image/x-raw - -# Phase One RAW image, big-endian -32 string MMMMRawT image/x-raw -# Phase One RAW image, little-endian -32 string IIIITwaR image/x-raw -# Canon RAW image -6 string HEAPCCDR image/x-raw -# Canon CR2 image (20D, 1Dmk2, ...) -0 string II*\000\020\000\000\000CR image/x-raw -# Minolta RAW image -0 string \x00MRM image/x-raw -# Fuji RAW image -0 string FUJIFILM image/x-raw -# Rollei RAW image -0 string DSC-Image image/x-raw -# Foveon RAW image -0 string FOVb image/x-raw - -# TIFF and friends -# TIFF file, big-endian -0 string MM\x00\x2a image/tiff -# TIFF file, little-endian -0 string II\x2a\x00 image/tiff - -# GIF -0 string GIF image/gif - -# JPEG images -0 beshort 0xffd8 image/jpeg - -# JPEG2000 images -0 beshort 0x0101010C6A50 image/jp2 - -# PNG images -0 string \x89PNG image/png - -# PC bitmaps (OS/2, Windoze BMP files) (Greg Roelofs, newt@uchicago.edu) -0 string BM -#(OS/2 1.x format) ->14 byte 12 image/x-bmp -#(OS/2 2.x format) ->14 byte 64 image/x-bmp -# (Windows 3.x format) ->14 byte 40 image/x-bmp - -# PCX images (Nadeem Hasan) -0 byte 10 -# Version 2.5 ->1 byte 0 image/x-pcx -# Version 2.8 w/ palette ->1 byte 2 image/x-pcx -# Version 2.8 w/o pallete ->1 byte 3 image/x-pcx -# Version 3.0 ->1 byte 5 image/x-pcx - -#0 string IC icon -#0 string PI pointer -#0 string CI color icon -#0 string CP color pointer -#0 string BA bitmap array - -# Gimp's XCF -0 string gimp\ xcf image/x-xcf-gimp - -# X11 cursor files -0 string Xcur image/x-xcursor - -# EXR images -0 lelong 0x762f3101 image/x-exr - -# SGI images (*.rgb, *.rgba, *.bw, *.sgi) -0 beshort 474 image/x-rgb - -#------------------------------------------------------------------------------ -# lisp: file(1) magic for lisp programs -# -# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com) -#0 string ;; text/plain -# Emacs 18 - this is always correct, but not very magical. -0 string \012( application/x-elc -# Emacs 19 -0 string ;ELC\023\000\000\000 application/x-elc - -#------------------------------------------------------------------------------ -# mail.news: file(1) magic for mail and news -# -# There are tests to ascmagic.c to cope with mail and news. - -0 string Relay-Version: message/rfc822 -0 string #!\ rnews message/rfc822 -0 string N#!\ rnews message/rfc822 -0 string Forward\ to message/rfc822 -0 string Pipe\ to message/rfc822 -0 string Return-Path: message/rfc822 -0 string Return-Path: message/rfc822 -0 string Path: message/news -0 string Xref: message/news -0 string From: message/rfc822 -0 string From\x20 application/mbox -0 string Article message/news -#0 string BABYL message/x-gnu-rmail -0 string Received: message/rfc822 - - - -# TNEF files... -0 lelong 0x223E9F78 application/ms-tnef - - -#------------------------------------------------------------------------------ -# mswrite - -0 lelong 0xBE31 application/x-mswrite -# with OLE objects -0 lelong 0xBE32 application/x-mswrite - -#------------------------------------------------------------------------------ -# msword: file(1) magic for MS Word files -# -# Contributor claims: -# Reversed-engineered MS Word magic numbers -# Except that they are generic MSOffice magic numbers ! (DF) - -0 string \376\067\0\043 application/msword -0 string \320\317\021\340\241\261 application/msword -0 string \333\245-\0\0\0 application/msword -2080 string Microsoft\ Word\ 6.0\ Document application/msword -2112 string Microsoft\ Word\ document\ data application/msword - -# excel -2080 string Microsoft\ Excel\ 5.0\ Worksheet application/msexcel - -#------------------------------------------------------------------------------ -# word perfect - -0 belong 0xff575053c405 application/wordperfect -1 string WPC application/wordperfect - -#------------------------------------------------------------------------------ -# printer: file(1) magic for printer-formatted files -# - -# PostScript -0 string %! application/postscript ->15 string EPS image/x-eps -0 string \004%! application/postscript ->16 string EPS image/x-eps - -# Acrobat -0 string %PDF- application/pdf -0 string \n%PDF- application/pdf - -#------------------------------------------------------------------------------ -# sc: file(1) magic for "sc" spreadsheet -# -38 string Spreadsheet application/x-sc - -#------------------------------------------------------------------------------ -# tex: file(1) magic for TeX files -# -# XXX - needs byte-endian stuff (big-endian and little-endian DVI?) -# -# From <conklin@talisman.kaleida.com> - -# Although we may know the offset of certain text fields in TeX DVI -# and font files, we can't use them reliably because they are not -# zero terminated. [but we do anyway, christos] -0 string \367\002 application/x-dvi -#0 string \367\203 TeX generic font data -#0 string \367\131 TeX packed font data -#0 string \367\312 TeX virtual font data -# Maybe we should have a mimetype like x-tex-log, but in any case -# text/plain is better than nothing. (David Faure) -0 string This\ is\ TeX, text/plain -0 string This\ is\ METAFONT, text/plain - -# XXX promoted from tex so that *.tfm is not mis-identified as mc68k file. -# There is no way to detect TeX Font Metric (*.tfm) files without -# breaking them apart and reading the data. The following patterns -# match most *.tfm files generated by METAFONT or afm2tfm. -2 string \000\021 application/x-tex-tfm ->33 string >\0 application/x-tex-tfm -2 string \000\022 application/x-tex-tfm ->33 string >\0 application/x-tex-tfm - -# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com) -#0 string \\input\ texinfo Texinfo source text -#0 string This\ is\ Info\ file GNU Info text - -# correct TeX magic for Linux (and maybe more) -# from Peter Tobias (tobias@server.et-inf.fho-emden.de) -# -0 leshort 0x02f7 application/x-dvi - -# RTF - Rich Text Format -0 string {\\rtf text/rtf - -# UTF16 (UTF16 docs are not MP3s - see the next audio/x-mp3 check :)) -0 beshort 0xfffe text/plain - -#------------------------------------------------------------------------------ -# animation: file(1) magic for animation/movie formats -# -# animation formats -# MPEG, FLI, DL originally from vax@ccwf.cc.utexas.edu (VaX#n8) -# FLC, SGI, Apple originally from Daniel Quinlan (quinlan@yggdrasil.com) - -# MPEG animation format -0 belong 0x000001b3 video/mpeg -0 belong 0x000001ba video/mpeg - -# MPEG 1.0 audio (layer III,II,I) -0 beshort&0xfff8 0xfff8 ->0 beshort&0x0006 0x0002 audio/x-mp3 ->0 beshort&0x0006 0x0004 audio/x-mp2 ->0 beshort&0x0006 0x0006 audio/mpeg - -# MPEG 2.0 audio (layer III,II,I) -0 beshort&0xfff8 0xfff0 ->0 beshort&0x0006 0x0002 audio/x-mp3 ->0 beshort&0x0006 0x0004 audio/x-mp2 ->0 beshort&0x0006 0x0006 audio/mpeg - -# MPEG 2.5 audio (layer III,II,I) -0 beshort&0xfff8 0xff80 ->0 beshort&0x0006 0x0002 audio/x-mp3 ->0 beshort&0x0006 0x0004 audio/x-mp2 ->0 beshort&0x0006 0x0006 audio/mpeg - -# MPEG-4 audio -16 string M4A audio/mp4 - - -# FLI animation format -0 leshort 0xAF11 video/x-flic -# FLC animation format -0 leshort 0xAF12 video/x-flic - -# SGI and Apple formats -0 string MOVI video/sgi -4 string moov video/quicktime -4 string mdat video/quicktime -4 string wide video/quicktime -4 string free video/quicktime - -# DIF digital video file format <mpruett@sgi.com> -#0 belong&0xffffff00 0x1f070000 DIF - -# Microsoft Advanced Streaming Format (ASF) <mpruett@sgi.com> -0 belong 0x3026b275 video/x-ms-asf - -# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/> -0 string \x8aMNG video/x-mng - -# JNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/> -#0 string \x8bJNG JNG video data, - -# Vivo video (Wolfram Kleff) -#3 string \x0D\x0AVersion:Vivo Vivo video data - -# VRML (Virtual Reality Modelling Language) -#0 string/b #VRML\ V1.0\ ascii VRML 1 file -#0 string/b #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file - -#------------------------------------------------------------------------------ -# Databases -# -# GDBM magic numbers -# Will be maintained as part of the GDBM distribution in the future. -# <downsj@teeny.org> -0 belong 0x13579ace application/x-gdbm -0 lelong 0x13579ace application/x-gdbm -0 string GDBM application/x-gdbm -# -0 belong 0x061561 application/x-dbm -# -# Executables -# -0 string \177ELF ->4 byte 0 ->4 byte 1 ->4 byte 2 ->5 byte 0 ->5 byte 1 ->>16 leshort 0 ->>16 leshort 1 application/x-object ->>16 leshort 2 application/x-executable ->>16 leshort 3 application/x-sharedlib ->>16 leshort 4 application/x-core ->5 byte 2 ->>16 beshort 0 ->>16 beshort 1 application/x-object ->>16 beshort 2 application/x-executable ->>16 beshort 3 application/x-sharedlib ->>16 beshort 4 application/x-core - -# MS Access database (95 or newer, i.e. MS Jet 3.0 or newer) -4 string Standard\ Jet\ DB application/x-msaccess - -# -# DOS -0 string MZ application/x-msdos-program -# -# KDE desktop file -0 string [Desktop\ Entry] application/x-desktop -0 string [Desktop\ Action application/x-desktop -0 string [KDE\ Desktop\ Entry] application/x-desktop -0 string \#\ Config\ File application/x-desktop -0 string \#\ KDE\ Config\ File application/x-desktop -# xmcd database file for kscd -0 string \#\ xmcd text/xmcd -# SQLite database files -0 string **\ This\ file\ contains\ an\ SQLite application/x-sqlite2 -0 string SQLite\ format\ 3 application/x-sqlite3 - -#------------------------------------------------------------------------------ -# Java - -0 short 0xcafe ->2 short 0xbabe application/x-java - -# vcard / vcalendar -0 string BEGIN:VCALENDAR text/x-vcalendar -0 string begin:vcalendar text/x-vcalendar -0 string BEGIN:VCARD text/x-vcard -0 string begin:vcard text/x-vcard - -# LDIF / LDAP interchange format -0 string dn:\ cn= text/x-ldif - -# applix -#------------------------------------------------------------------------------ -# applix: file(1) magic for Applixware -# From: Peter Soos <sp@osb.hu> -# -0 string *BEGIN ->7 string WORDS application/x-applixword ->7 string GRAPHICS application/x-applixgraphics -#>7 string RASTER application/x-applix ->7 string SPREADSHEETS application/x-applixspread -#>7 string MACRO application/x-applix -#>7 string BUILDER application/x-applix - -#------------------------------------------------------------------------------ -# diff: file(1) magic for diff(1) output -# -0 string diff\ text/x-diff -0 string ***\ text/x-diff -0 string Only\ in\ text/x-diff -0 string Common\ subdirectories:\ text/x-diff - - -#------------------------------------------------------------------------------ -# flash: file(1) magic for Macromedia Flash file format -# -# See -# -# http://www.macromedia.com/software/flash/open/ -# -0 string FWS application/x-shockwave-flash - -#------------------------------------------------------------------------------ -# DjVu (Leon Bottou <leonb@research.att.com>): -# -4 string FORM ->12 string DJVU image/x-djvu ->12 string DJVM image/x-djvu ->12 string BM44 image/x-djvu ->12 string PM44 image/x-djvu - -#------------------------------------------------------------------------------ -# adi: file(1) magic for ADi's objects -# From Gregory McGarry <g.mcgarry@ieee.org> -# -0 leshort 0x521c application/x-executable # COFF DSP21k ->18 lelong &02 application/x-executable # executable, ->18 lelong ^02 ->>18 lelong &01 application/x-executable # static object, ->>18 lelong ^01 application/x-executable # relocatable object, ->18 lelong &010 application/x-executable # stripped ->18 lelong ^010 application/x-executable # not stripped - -#------------------------------------------------------------------------------ -# alliant: file(1) magic for Alliant FX series a.out files -# -# If the FX series is the one that had a processor with a 68K-derived -# instruction set, the "short" should probably become "beshort" and the -# "long" should probably become "belong". -# If it's the i860-based one, they should probably become either the -# big-endian or little-endian versions, depending on the mode they ran -# the 860 in.... -# -0 short 0420 application/x-executable # 0420 Alliant virtual executable ->2 short &0x0020 application/x-sharedlib # common library ->16 long >0 application/x-sharedlib # not stripped -0 short 0421 application/x-executable # 0421 Alliant compact executable ->2 short &0x0020 application/x-sharedlib # common library ->16 long >0 application/x-sharedlib # not stripped -#----------------------------------------------------------- - -# alpha architecture description -# - -0 leshort 0603 application/x-executable # COFF format alpha ->22 leshort&030000 !020000 application/x-executable # executable ->24 leshort 0410 application/x-executable # pure ->24 leshort 0413 application/x-executable # paged ->22 leshort&020000 !0 application/x-executable # dynamically linked ->16 lelong !0 application/x-executable # not stripped ->16 lelong 0 application/x-executable # stripped ->22 leshort&030000 020000 application/x-sharedlib # shared library ->24 leshort 0407 application/x-executable # object - -# Basic recognition of Digital UNIX core dumps - Mike Bremford <mike@opac.bl.uk> -# -# The actual magic number is just "Core", followed by a 2-byte version -# number; however, treating any file that begins with "Core" as a Digital -# UNIX core dump file may produce too many false hits, so we include one -# byte of the version number as well; DU 5.0 appears only to be up to -# version 2. -# -0 string Core\001 application/x-core # Alpha COFF format core dump (Digital UNIX) -0 string Core\002 application/x-core # Alpha COFF format core dump (Digital UNIX) - -#------------------------------------------------------------------------------ -# Win95 InternetShortcut (URL): (Helge Deller <deller@gmx.de>): -# -1 string InternetShortcut application/x-mswinurl - -#------------------------------------------------------------------------------ -# amigaos: file(1) magic for AmigaOS binary formats: - -# -# From ignatios@cs.uni-bonn.de (Ignatios Souvatzis) -# Some formats are still missing: AmigaOS special IFF's, e.g.: FORM....CTLG -# (the others should be separate, anyway) -# -0 belong 0x000003f3 application/x-executable # AmigaOS loadseg()ble executable/binary -0 belong 0x000003e7 application/x-sharedlib # AmigaOS object/library data - -0 string %TGIF application/x-tgif - -0 string #FIG application/x-xfig -0 string #LyX\ 1 application/x-lyx - -#------------------------------------------------------------------------------ -# VRML -0 string #VRML model/vrml - -#------------------------------------------------------------------------------ -# TDEWallet file -0 string KWALLET\012\015\000\015\012 application/x-tde-wallet - -#------------------------------------------------------------------------------ -# ICA Client configuration files -0 string [WFClient] application/x-ica -1 string [WFClient] application/x-ica -0 string [ApplicationServers] application/x-ica -1 string [ApplicationServers] application/x-ica -0 string [ICA application/x-ica -1 string [ICA application/x-ica -0 string [Program\ Neighborhood application/x-ica -1 string [Program\ Neighborhood application/x-ica - -#------------------------------------------------------------------------------ -# CD image files (ISO is imported from file 4.07) -32769 string CD001 application/x-iso -32633 string CD001 application/x-iso - -# CDR-wins bin-with-cue files -#0 belong 0x00FFFFFF application/x-cuebin - -# FITS (see RFC 4047) -# The SIMPLE keyword is always on the first line, NAXIS on the third. -# Lines are supposed to be exactly 80 characters long. -# FITS files can be different but then they are not application/fits anymore. -0 string SIMPLE\ \ =\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ T application/fits ->160 string NAXIS\ \ \ =\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 2 image/fits ->160 string NAXIS\ \ \ =\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 3 image/fits - -# kate: space-indent off; replace-tabs off; diff --git a/tdeio/tdeio/CMakeLists.txt b/tdeio/tdeio/CMakeLists.txt index 5f04568c5..2b90c0107 100644 --- a/tdeio/tdeio/CMakeLists.txt +++ b/tdeio/tdeio/CMakeLists.txt @@ -117,7 +117,7 @@ set( ${target}_SRCS tde_add_library( ${target} STATIC_PIC AUTOMOC SOURCES ${${target}_SRCS} - LINK ${GAMIN_LIBRARIES} + LINK magic ${GAMIN_LIBRARIES} ) diff --git a/tdeio/tdeio/kmimemagic.cpp b/tdeio/tdeio/kmimemagic.cpp index 6aae4e39d..ea0a30498 100644 --- a/tdeio/tdeio/kmimemagic.cpp +++ b/tdeio/tdeio/kmimemagic.cpp @@ -1,4 +1,7 @@ -/* This file is part of the KDE libraries +/* This file is part of the TDE libraries + Copyright (C) 2014 Timothy Pearson <kb9vqf@pearsoncomputing.net> + + Small portions (the original KDE interface and utime code) are: Copyright (C) 2000 Fritz Elfert <fritz@kde.org> Copyright (C) 2004 Allan Sandfeld Jensen <kde@carewolf.com> @@ -26,29 +29,23 @@ #include <klargefile.h> #include <assert.h> -static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb); -static void process(struct config_rec* conf, const TQString &); -static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes); -static int tagmagic(unsigned char *buf, int nbytes); -static int textmagic(struct config_rec* conf, unsigned char *, int); +#include <magic.h> -static void tryit(struct config_rec* conf, unsigned char *buf, int nb); -static int match(struct config_rec* conf, unsigned char *, int); +static void process(struct config_rec* conf, const TQString &); KMimeMagic* KMimeMagic::s_pSelf; static KStaticDeleter<KMimeMagic> kmimemagicsd; -KMimeMagic* KMimeMagic::self() -{ - if( !s_pSelf ) - initStatic(); - return s_pSelf; +KMimeMagic* KMimeMagic::self() { + if( !s_pSelf ) { + initStatic(); + } + return s_pSelf; } -void KMimeMagic::initStatic() -{ - s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() ); - s_pSelf->setFollowLinks( true ); +void KMimeMagic::initStatic() { + s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() ); + s_pSelf->setFollowLinks( true ); } #include <stdio.h> @@ -66,449 +63,16 @@ void KMimeMagic::initStatic() #include <tqregexp.h> #include <tqstring.h> -//#define MIME_MAGIC_DEBUG_TABLE // untested - -// Uncomment to debug the config-file parsing phase -//#define DEBUG_APPRENTICE -// Uncomment to debug the matching phase -//#define DEBUG_MIMEMAGIC - -#if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE) -#define DEBUG_LINENUMBERS -#endif +#define HOWMANY 4000 /* big enough to recognize most WWW files, and skip GPL-headers */ -/* - * Buitltin Mime types - */ -#define MIME_BINARY_UNKNOWN "application/octet-stream" -#define MIME_BINARY_UNREADABLE "application/x-unreadable" -#define MIME_BINARY_ZEROSIZE "application/x-zerosize" -#define MIME_TEXT_UNKNOWN "text/plain" -#define MIME_TEXT_PLAIN "text/plain" #define MIME_INODE_DIR "inode/directory" #define MIME_INODE_CDEV "inode/chardevice" #define MIME_INODE_BDEV "inode/blockdevice" #define MIME_INODE_FIFO "inode/fifo" #define MIME_INODE_LINK "inode/link" #define MIME_INODE_SOCK "inode/socket" -// Following should go in magic-file - Fritz -#define MIME_APPL_TROFF "application/x-troff" -#define MIME_APPL_TAR "application/x-tar" -#define MIME_TEXT_FORTRAN "text/x-fortran" - -#define MAXMIMESTRING 256 - -#define HOWMANY 4000 /* big enough to recognize most WWW files, and skip GPL-headers */ -#define MAXDESC 50 /* max leng of text description */ -#define MAXstring 64 /* max leng of "string" types */ - -typedef union VALUETYPE { - unsigned char b; - unsigned short h; - unsigned long l; - char s[MAXstring]; - unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ - unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ -} VALUETYPE; - -struct magic { - struct magic *next; /* link to next entry */ -#ifdef DEBUG_LINENUMBERS - int lineno; /* line number from magic file - doesn't say from which one ;) */ -#endif - - short flag; -#define INDIR 1 /* if '>(...)' appears, */ -#define UNSIGNED 2 /* comparison is unsigned */ - short cont_level; /* level of ">" */ - struct { - char type; /* byte short long */ - long offset; /* offset from indirection */ - } in; - long offset; /* offset to magic number */ - unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ - char type; /* int, short, long or string. */ - char vallen; /* length of string value, if any */ -#define BYTE 1 -#define SHORT 2 -#define LONG 4 -#define STRING 5 -#define DATE 6 -#define BESHORT 7 -#define BELONG 8 -#define BEDATE 9 -#define LESHORT 10 -#define LELONG 11 -#define LEDATE 12 - VALUETYPE value; /* either number or string */ - unsigned long mask; /* mask before comparison with value */ - char nospflag; /* suppress space character */ - - /* NOTE: this string is suspected of overrunning - find it! */ - char desc[MAXDESC]; /* description */ -}; - -/* - * data structures for tar file recognition - * -------------------------------------------------------------------------- - * Header file for public domain tar (tape archive) program. - * - * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John - * Gilmore, ihnp4!hoptoad!gnu. - * - * Header block on tape. - * - * I'm going to use traditional DP naming conventions here. A "block" is a big - * chunk of stuff that we do I/O on. A "record" is a piece of info that we - * care about. Typically many "record"s fit into a "block". - */ -#define RECORDSIZE 512 -#define NAMSIZ 100 -#define TUNMLEN 32 -#define TGNMLEN 32 - -union record { - char charptr[RECORDSIZE]; - struct header { - char name[NAMSIZ]; - char mode[8]; - char uid[8]; - char gid[8]; - char size[12]; - char mtime[12]; - char chksum[8]; - char linkflag; - char linkname[NAMSIZ]; - char magic[8]; - char uname[TUNMLEN]; - char gname[TGNMLEN]; - char devmajor[8]; - char devminor[8]; - } header; -}; - -/* The magic field is filled with this if uname and gname are valid. */ -#define TMAGIC "ustar " /* 7 chars and a null */ - -/* - * file-function prototypes - */ -static int is_tar(unsigned char *, int); -static unsigned long signextend(struct magic *, unsigned long); -static int getvalue(struct magic *, char **); -static int hextoint(int); -static char *getstr(char *, char *, int, int *); -static int mget(union VALUETYPE *, unsigned char *, struct magic *, int); -static int mcheck(union VALUETYPE *, struct magic *); -static int mconvert(union VALUETYPE *, struct magic *); -static long from_oct(int, char *); - -/* - * includes for ASCII substring recognition formerly "names.h" in file - * command - * - * Original notes: names and types used by ascmagic in file(1). - * These tokens are - * here because they can appear anywhere in the first HOWMANY bytes, while - * tokens in /etc/magic must appear at fixed offsets into the file. Don't - * make HOWMANY too high unless you have a very fast CPU. - */ - -/* these types are used calculate index to 'types': keep em in sync! */ -/* HTML inserted in first because this is a web server module now */ -/* ENG removed because stupid */ -#define L_HTML 0x001 /* HTML */ -#define L_C 0x002 /* first and foremost on UNIX */ -#define L_MAKE 0x004 /* Makefiles */ -#define L_PLI 0x008 /* PL/1 */ -#define L_MACH 0x010 /* some kinda assembler */ -#define L_PAS 0x020 /* Pascal */ -#define L_JAVA 0x040 /* Java source */ -#define L_CPP 0x080 /* C++ */ -#define L_MAIL 0x100 /* Electronic mail */ -#define L_NEWS 0x200 /* Usenet Netnews */ -#define L_DIFF 0x400 /* Output of diff */ -#define L_OBJC 0x800 /* Objective C */ - -// Note: this is not a type, it's just used to mark items that should count more -#define FLAG_STRONG 0x1000 - -#define P_HTML 0 /* HTML */ -#define P_C 1 /* first and foremost on UNIX */ -#define P_MAKE 2 /* Makefiles */ -#define P_PLI 3 /* PL/1 */ -#define P_MACH 4 /* some kinda assembler */ -#define P_PAS 5 /* Pascal */ -#define P_JAVA 6 /* Java source */ -#define P_CPP 7 /* C++ */ -#define P_MAIL 8 /* Electronic mail */ -#define P_NEWS 9 /* Usenet Netnews */ -#define P_DIFF 10 /* Output of diff */ -#define P_OBJC 11 /* Objective C */ - -typedef struct asc_type { - const char *type; - int kwords; - double weight; -} asc_type; - -static const asc_type types[] = { - { "text/html", 19, 2 }, // 10 items but 10 different words only - { "text/x-c", 13, 1 }, - { "text/x-makefile", 4, 1.9 }, - { "text/x-pli", 1, 3 }, - { "text/x-assembler", 6, 2.1 }, - { "text/x-pascal", 1, 1 }, - { "text/x-java", 12, 1 }, - { "text/x-c++", 19, 1 }, - { "message/rfc822", 4, 1.9 }, - { "message/news", 3, 2 }, - { "text/x-diff", 4, 2 }, - { "text/x-objc", 10, 1 } -}; - -#define NTYPES (sizeof(types)/sizeof(asc_type)) - -static struct names { - const char *name; - short type; -} const names[] = { - { - "<html", L_HTML | FLAG_STRONG - }, - { - "<HTML", L_HTML | FLAG_STRONG - }, - { - "<head", L_HTML - }, - { - "<HEAD", L_HTML - }, - { - "<body", L_HTML - }, - { - "<BODY", L_HTML - }, - { - "<title", L_HTML - }, - { - "<TITLE", L_HTML - }, - { - "<h1", L_HTML - }, - { - "<H1", L_HTML - }, - { - "<a", L_HTML - }, - { - "<A", L_HTML - }, - { - "<img", L_HTML - }, - { - "<IMG", L_HTML - }, - { - "<!--", L_HTML - }, - { - "<!doctype", L_HTML - }, - { - "<!DOCTYPE", L_HTML - }, - { - "<div", L_HTML - }, - { - "<DIV", L_HTML - }, - { - "<frame", L_HTML - }, - { - "<FRAME", L_HTML - }, - { - "<frameset", L_HTML - }, - { - "<FRAMESET", L_HTML - }, - { - "<script", L_HTML | FLAG_STRONG - }, - { - "<SCRIPT", L_HTML | FLAG_STRONG - }, - { - "/*", L_C|L_CPP|L_JAVA|L_OBJC - }, - { - "//", L_C|L_CPP|L_JAVA|L_OBJC - }, - { - "#include", L_C|L_CPP - }, - { - "#ifdef", L_C|L_CPP - }, - { - "#ifndef", L_C|L_CPP - }, - { - "bool", L_C|L_CPP - }, - { - "char", L_C|L_CPP|L_JAVA|L_OBJC - }, - { - "int", L_C|L_CPP|L_JAVA|L_OBJC - }, - { - "float", L_C|L_CPP|L_JAVA|L_OBJC - }, - { - "void", L_C|L_CPP|L_JAVA|L_OBJC - }, - { - "extern", L_C|L_CPP - }, - { - "struct", L_C|L_CPP - }, - { - "union", L_C|L_CPP - }, - { - "implements", L_JAVA - }, - { - "super", L_JAVA - }, - { - "import", L_JAVA - }, - { - "class", L_CPP|L_JAVA - }, - { - "public", L_CPP|L_JAVA - }, - { - "private", L_CPP|L_JAVA - }, - { - "explicit", L_CPP - }, - { - "virtual", L_CPP - }, - { - "namespace", L_CPP - }, - { - "#import", L_OBJC - }, - { - "@interface", L_OBJC - }, - { - "@implementation", L_OBJC - }, - { - "@protocol", L_OBJC - }, - { - "CFLAGS", L_MAKE - }, - { - "LDFLAGS", L_MAKE - }, - { - "all:", L_MAKE - }, - { - ".PHONY:", L_MAKE - }, - { - "srcdir", L_MAKE - }, - { - "exec_prefix", L_MAKE - }, - /* - * Too many files of text have these words in them. Find another way - * to recognize Fortrash. - */ - { - ".ascii", L_MACH - }, - { - ".asciiz", L_MACH - }, - { - ".byte", L_MACH - }, - { - ".even", L_MACH - }, - { - ".globl", L_MACH - }, - { - "clr", L_MACH - }, - { - "(input", L_PAS - }, - { - "dcl", L_PLI - }, - { - "Received:", L_MAIL - }, - /* we now stop at '>' for tokens, so this one won't work { - ">From", L_MAIL - },*/ - { - "Return-Path:", L_MAIL - }, - { - "Cc:", L_MAIL - }, - { - "Newsgroups:", L_NEWS - }, - { - "Path:", L_NEWS - }, - { - "Organization:", L_NEWS - }, - { - "---", L_DIFF - }, - { - "+++", L_DIFF - }, - { - "***", L_DIFF - }, - { - "@@", L_DIFF - }, - { - NULL, 0 - } -}; +#define MIME_BINARY_UNREADABLE "application/x-unreadable" +#define MIME_BINARY_ZEROSIZE "application/x-zerosize" /** * Configuration for the utime() problem. @@ -520,830 +84,84 @@ static struct names { * anywhere else, because that breaks archiving programs, that check the ctime. * Hence this class, to configure the directories where the atime should be restored. */ -class KMimeMagicUtimeConf -{ -public: - KMimeMagicUtimeConf() - { - tmpDirs << TQString::fromLatin1("/tmp"); // default value - - // The trick is that we also don't want the user to override globally set - // directories. So we have to misuse TDEStandardDirs :} - TQStringList confDirs = TDEGlobal::dirs()->resourceDirs( "config" ); - if ( !confDirs.isEmpty() ) - { - TQString globalConf = confDirs.last() + "kmimemagicrc"; - if ( TQFile::exists( globalConf ) ) - { - KSimpleConfig cfg( globalConf ); - cfg.setGroup( "Settings" ); - tmpDirs = cfg.readListEntry( "atimeDirs" ); - } - if ( confDirs.count() > 1 ) - { - TQString localConf = confDirs.first() + "kmimemagicrc"; - if ( TQFile::exists( localConf ) ) - { - KSimpleConfig cfg( localConf ); - cfg.setGroup( "Settings" ); - tmpDirs += cfg.readListEntry( "atimeDirs" ); - } - } - for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) - { - TQString dir = *it; - if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' ) - (*it) += '/'; - } - } -#if 0 - // debug code - for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) - kdDebug(7018) << " atimeDir: " << *it << endl; -#endif - } - - bool restoreAccessTime( const TQString & file ) const - { - TQString dir = file.left( file.findRev( '/' ) ); - bool res = tmpDirs.contains( dir ); - //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl; - return res; - } - TQStringList tmpDirs; -}; - -/* current config */ -struct config_rec { - bool followLinks; - TQString resultBuf; - int accuracy; - - struct magic *magic, /* head of magic config list */ - *last; - KMimeMagicUtimeConf * utimeConf; -}; - -#ifdef MIME_MAGIC_DEBUG_TABLE -static void -test_table() -{ - struct magic *m; - struct magic *prevm = NULL; - - kdDebug(7018) << "test_table : started" << endl; - for (m = conf->magic; m; m = m->next) { - if (isprint((((unsigned long) m) >> 24) & 255) && - isprint((((unsigned long) m) >> 16) & 255) && - isprint((((unsigned long) m) >> 8) & 255) && - isprint(((unsigned long) m) & 255)) { - //debug("test_table: POINTER CLOBBERED! " - //"m=\"%c%c%c%c\" line=%d", - (((unsigned long) m) >> 24) & 255, - (((unsigned long) m) >> 16) & 255, - (((unsigned long) m) >> 8) & 255, - ((unsigned long) m) & 255, - prevm ? prevm->lineno : -1); - break; - } - prevm = m; - } -} -#endif - -#define EATAB {while (isascii((unsigned char) *l) && \ - isspace((unsigned char) *l)) ++l;} - -int KMimeMagic::parse_line(char *line, int *rule, int lineno) -{ - int ws_offset; - - /* delete newline */ - if (line[0]) { - line[strlen(line) - 1] = '\0'; - } - /* skip leading whitespace */ - ws_offset = 0; - while (line[ws_offset] && isspace(line[ws_offset])) { - ws_offset++; - } - - /* skip blank lines */ - if (line[ws_offset] == 0) { - return 0; - } - /* comment, do not parse */ - if (line[ws_offset] == '#') - return 0; - - /* if we get here, we're going to use it so count it */ - (*rule)++; - - /* parse it */ - return (parse(line + ws_offset, lineno) != 0); -} - -/* - * apprentice - load configuration from the magic file. - */ -int KMimeMagic::apprentice( const TQString& magicfile ) -{ - FILE *f; - char line[BUFSIZ + 1]; - int errs = 0; - int lineno; - int rule = 0; - TQCString fname; - - if (magicfile.isEmpty()) - return -1; - fname = TQFile::encodeName(magicfile); - f = fopen(fname, "r"); - if (f == NULL) { - kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl; - return -1; - } - - /* parse it */ - for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) - if (parse_line(line, &rule, lineno)) - errs++; - - fclose(f); - -#ifdef DEBUG_APPRENTICE - kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; - kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; -#endif - -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif - - return (errs ? -1 : 0); -} - -int KMimeMagic::buff_apprentice(char *buff) -{ - char line[BUFSIZ + 2]; - int errs = 0; - int lineno = 1; - char *start = buff; - char *end; - int count = 0; - int rule = 0; - int len = strlen(buff) + 1; - - /* parse it */ - do { - count = (len > BUFSIZ-1)?BUFSIZ-1:len; - strncpy(line, start, count); - line[count] = '\0'; - if ((end = strchr(line, '\n'))) { - *(++end) = '\0'; - count = strlen(line); - } else - strcat(line, "\n"); - start += count; - len -= count; - if (parse_line(line, &rule, lineno)) - errs++; - lineno++; - } while (len > 0); - -#ifdef DEBUG_APPRENTICE - kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; - kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; -#endif - -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif - - return (errs ? -1 : 0); -} - -/* - * extend the sign bit if the comparison is to be signed - */ -static unsigned long -signextend(struct magic *m, unsigned long v) -{ - if (!(m->flag & UNSIGNED)) - switch (m->type) { - /* - * Do not remove the casts below. They are vital. - * When later compared with the data, the sign - * extension must have happened. - */ - case BYTE: - v = (char) v; - break; - case SHORT: - case BESHORT: - case LESHORT: - v = (short) v; - break; - case DATE: - case BEDATE: - case LEDATE: - case LONG: - case BELONG: - case LELONG: - v = (long) v; - break; - case STRING: - break; - default: - kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl; - return 998; //good value - } - return v; -} - -/* - * parse one line from magic file, put into magic[index++] if valid - */ -int KMimeMagic::parse(char *l, int -#ifdef DEBUG_LINENUMBERS - lineno -#endif - ) -{ - int i = 0; - struct magic *m; - char *t, - *s; - /* allocate magic structure entry */ - if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) { - kdError(7018) << "parse: Out of memory." << endl; - return -1; - } - /* append to linked list */ - m->next = NULL; - if (!conf->magic || !conf->last) { - conf->magic = conf->last = m; - } else { - conf->last->next = m; - conf->last = m; - } - - /* set values in magic structure */ - m->flag = 0; - m->cont_level = 0; -#ifdef DEBUG_LINENUMBERS - m->lineno = lineno; -#endif - - while (*l == '>') { - ++l; /* step over */ - m->cont_level++; - } - - if (m->cont_level != 0 && *l == '(') { - ++l; /* step over */ - m->flag |= INDIR; - } - /* get offset, then skip over it */ - m->offset = (int) strtol(l, &t, 0); - if (l == t) { - kdError(7018) << "parse: offset " << l << " invalid" << endl; - } - l = t; - - if (m->flag & INDIR) { - m->in.type = LONG; - m->in.offset = 0; - /* - * read [.lbs][+-]nnnnn) - */ - if (*l == '.') { - switch (*++l) { - case 'l': - m->in.type = LONG; - break; - case 's': - m->in.type = SHORT; - break; - case 'b': - m->in.type = BYTE; - break; - default: - kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl; - break; +class KMimeMagicUtimeConf { + public: + KMimeMagicUtimeConf() { + tmpDirs << TQString::fromLatin1("/tmp"); // default value + + // The trick is that we also don't want the user to override globally set + // directories. So we have to misuse TDEStandardDirs :} + TQStringList confDirs = TDEGlobal::dirs()->resourceDirs( "config" ); + if ( !confDirs.isEmpty() ) { + TQString globalConf = confDirs.last() + "kmimemagicrc"; + if ( TQFile::exists( globalConf ) ) { + KSimpleConfig cfg( globalConf ); + cfg.setGroup( "Settings" ); + tmpDirs = cfg.readListEntry( "atimeDirs" ); + } + if ( confDirs.count() > 1 ) { + TQString localConf = confDirs.first() + "kmimemagicrc"; + if ( TQFile::exists( localConf ) ) { + KSimpleConfig cfg( localConf ); + cfg.setGroup( "Settings" ); + tmpDirs += cfg.readListEntry( "atimeDirs" ); + } + } + for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) { + TQString dir = *it; + if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' ) { + (*it) += '/'; + } + } } - l++; + #if 0 + // debug code + for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) { + kdDebug(7018) << " atimeDir: " << *it << endl; + } + #endif } - s = l; - if (*l == '+' || *l == '-') - l++; - if (isdigit((unsigned char) *l)) { - m->in.offset = strtol(l, &t, 0); - if (*s == '-') - m->in.offset = -m->in.offset; - } else - t = l; - if (*t++ != ')') { - kdError(7018) << "parse: missing ')' in indirect offset" << endl; + + bool restoreAccessTime( const TQString & file ) const { + TQString dir = file.left( file.findRev( '/' ) ); + bool res = tmpDirs.contains( dir ); + //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl; + return res; } - l = t; - } - while (isascii((unsigned char) *l) && isdigit((unsigned char) *l)) - ++l; - EATAB; - -#define NBYTE 4 -#define NSHORT 5 -#define NLONG 4 -#define NSTRING 6 -#define NDATE 4 -#define NBESHORT 7 -#define NBELONG 6 -#define NBEDATE 6 -#define NLESHORT 7 -#define NLELONG 6 -#define NLEDATE 6 + TQStringList tmpDirs; +}; - if (*l == 'u') { - ++l; - m->flag |= UNSIGNED; +TQString fixupMagicOutput(TQString &mime) { + if (mime == "inode/x-empty") { + return MIME_BINARY_ZEROSIZE; } - /* get type, skip it */ - if (strncmp(l, "byte", NBYTE) == 0) { - m->type = BYTE; - l += NBYTE; - } else if (strncmp(l, "short", NSHORT) == 0) { - m->type = SHORT; - l += NSHORT; - } else if (strncmp(l, "long", NLONG) == 0) { - m->type = LONG; - l += NLONG; - } else if (strncmp(l, "string", NSTRING) == 0) { - m->type = STRING; - l += NSTRING; - } else if (strncmp(l, "date", NDATE) == 0) { - m->type = DATE; - l += NDATE; - } else if (strncmp(l, "beshort", NBESHORT) == 0) { - m->type = BESHORT; - l += NBESHORT; - } else if (strncmp(l, "belong", NBELONG) == 0) { - m->type = BELONG; - l += NBELONG; - } else if (strncmp(l, "bedate", NBEDATE) == 0) { - m->type = BEDATE; - l += NBEDATE; - } else if (strncmp(l, "leshort", NLESHORT) == 0) { - m->type = LESHORT; - l += NLESHORT; - } else if (strncmp(l, "lelong", NLELONG) == 0) { - m->type = LELONG; - l += NLELONG; - } else if (strncmp(l, "ledate", NLEDATE) == 0) { - m->type = LEDATE; - l += NLEDATE; - } else { - kdError(7018) << "parse: type " << l << " invalid" << endl; - return -1; + else if (mime.contains("no read permission")) { + return MIME_BINARY_UNREADABLE; } - /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ - if (*l == '&') { - ++l; - m->mask = signextend(m, strtol(l, &l, 0)); - } else - m->mask = (unsigned long) ~0L; - EATAB; - - switch (*l) { - case '>': - case '<': - /* Old-style anding: "0 byte &0x80 dynamically linked" */ - case '&': - case '^': - case '=': - m->reln = *l; - ++l; - break; - case '!': - if (m->type != STRING) { - m->reln = *l; - ++l; - break; - } - /* FALL THROUGH */ - default: - if (*l == 'x' && isascii((unsigned char) l[1]) && - isspace((unsigned char) l[1])) { - m->reln = *l; - ++l; - goto GetDesc; /* Bill The Cat */ - } - m->reln = '='; - break; + else { + return mime; } - EATAB; - - if (getvalue(m, &l)) - return -1; - /* - * now get last part - the description - */ - GetDesc: - EATAB; - if (l[0] == '\b') { - ++l; - m->nospflag = 1; - } else if ((l[0] == '\\') && (l[1] == 'b')) { - ++l; - ++l; - m->nospflag = 1; - } else - m->nospflag = 0; - // Copy description - until EOL or '#' (for comments) - while (*l != '\0' && *l != '#' && i < MAXDESC-1) - m->desc[i++] = *l++; - m->desc[i] = '\0'; - // Remove trailing spaces - while (--i>0 && isspace( m->desc[i] )) - m->desc[i] = '\0'; - - // old code - //while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ; - -#ifdef DEBUG_APPRENTICE - kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl; -#endif - return 0; -} - -/* - * Read a numeric value from a pointer, into the value union of a magic - * pointer, according to the magic type. Update the string pointer to point - * just after the number read. Return 0 for success, non-zero for failure. - */ -static int -getvalue(struct magic *m, char **p) -{ - int slen; - - if (m->type == STRING) { - *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); - m->vallen = slen; - } else if (m->reln != 'x') - m->value.l = signextend(m, strtol(*p, p, 0)); - return 0; } -/* - * Convert a string containing C character escapes. Stop at an unescaped - * space or tab. Copy the converted version to "p", returning its length in - * *slen. Return updated scan pointer as function result. - */ -static char * -getstr(register char *s, register char *p, int plen, int *slen) -{ - char *origs = s, - *origp = p; - char *pmax = p + plen - 1; - register int c; - register int val; - - while ((c = *s++) != '\0') { - if (isspace((unsigned char) c)) - break; - if (p >= pmax) { - kdError(7018) << "String too long: " << origs << endl; - break; - } - if (c == '\\') { - switch (c = *s++) { - - case '\0': - goto out; - - default: - *p++ = (char) c; - break; - - case 'n': - *p++ = '\n'; - break; - - case 'r': - *p++ = '\r'; - break; - - case 'b': - *p++ = '\b'; - break; - - case 't': - *p++ = '\t'; - break; - - case 'f': - *p++ = '\f'; - break; - - case 'v': - *p++ = '\v'; - break; - - /* \ and up to 3 octal digits */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - val = c - '0'; - c = *s++; /* try for 2 */ - if (c >= '0' && c <= '7') { - val = (val << 3) | (c - '0'); - c = *s++; /* try for 3 */ - if (c >= '0' && c <= '7') - val = (val << 3) | (c - '0'); - else - --s; - } else - --s; - *p++ = (char) val; - break; - - /* \x and up to 3 hex digits */ - case 'x': - val = 'x'; /* Default if no digits */ - c = hextoint(*s++); /* Get next char */ - if (c >= 0) { - val = c; - c = hextoint(*s++); - if (c >= 0) { - val = (val << 4) + c; - c = hextoint(*s++); - if (c >= 0) { - val = (val << 4) + c; - } else - --s; - } else - --s; - } else - --s; - *p++ = (char) val; - break; - } - } else - *p++ = (char) c; - } - out: - *p = '\0'; - *slen = p - origp; - //for ( char* foo = origp; foo < p ; ++foo ) - // kdDebug(7018) << " " << *foo << endl; - return s; -} +/* current config */ +struct config_rec { + bool followLinks; + TQString resultBuf; + int accuracy; + magic_t magic; + TQStringList databases; -/* Single hex char to int; -1 if not a hex char. */ -static int -hextoint(int c) -{ - if (!isascii((unsigned char) c)) - return -1; - if (isdigit((unsigned char) c)) - return c - '0'; - if ((c >= 'a') && (c <= 'f')) - return c + 10 - 'a'; - if ((c >= 'A') && (c <= 'F')) - return c + 10 - 'A'; - return -1; -} + KMimeMagicUtimeConf * utimeConf; +}; /* - * Convert the byte order of the data we are looking at + * apprentice - load configuration from the magic file. */ -static int -mconvert(union VALUETYPE *p, struct magic *m) -{ - switch (m->type) { - case BYTE: - return 1; - case STRING: - /* Null terminate */ - p->s[sizeof(p->s) - 1] = '\0'; - return 1; -#ifndef WORDS_BIGENDIAN - case SHORT: -#endif - case BESHORT: - p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); - return 1; -#ifndef WORDS_BIGENDIAN - case LONG: - case DATE: -#endif - case BELONG: - case BEDATE: - p->l = (long) - ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); - return 1; -#ifdef WORDS_BIGENDIAN - case SHORT: -#endif - case LESHORT: - p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); - return 1; -#ifdef WORDS_BIGENDIAN - case LONG: - case DATE: -#endif - case LELONG: - case LEDATE: - p->l = (long) - ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); - return 1; - default: - kdError(7018) << "mconvert: invalid type " << m->type << endl; - return 0; - } -} - - -static int -mget(union VALUETYPE *p, unsigned char *s, struct magic *m, - int nbytes) -{ - long offset = m->offset; - switch ( m->type ) - { - case BYTE: - if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1 - return 0; - break; - case SHORT: - case BESHORT: - case LESHORT: - if ( offset + 2 > nbytes-1 ) - return 0; - break; - case LONG: - case BELONG: - case LELONG: - case DATE: - case BEDATE: - case LEDATE: - if ( offset + 4 > nbytes-1 ) - return 0; - break; - case STRING: - break; - } - -// The file length might be < sizeof(union VALUETYPE) (David) -// -> pad with zeros (the 'file' command does it this way) -// Thanks to Stan Covington <stan@calderasystems.com> for detailed report - if (offset + (int)sizeof(union VALUETYPE) > nbytes) - { - int have = nbytes - offset; - memset(p, 0, sizeof(union VALUETYPE)); - if (have > 0) - memcpy(p, s + offset, have); - } else - memcpy(p, s + offset, sizeof(union VALUETYPE)); - - if (!mconvert(p, m)) - return 0; - - if (m->flag & INDIR) { - - switch (m->in.type) { - case BYTE: - offset = p->b + m->in.offset; - break; - case SHORT: - offset = p->h + m->in.offset; - break; - case LONG: - offset = p->l + m->in.offset; - break; - } - - if (offset + (int)sizeof(union VALUETYPE) > nbytes) - return 0; - - memcpy(p, s + offset, sizeof(union VALUETYPE)); - - if (!mconvert(p, m)) - return 0; - } - return 1; -} - -static int -mcheck(union VALUETYPE *p, struct magic *m) -{ - register unsigned long l = m->value.l; - register unsigned long v; - int matched; - - if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { - kdError(7018) << "BOINK" << endl; - return 1; - } - switch (m->type) { - case BYTE: - v = p->b; - break; - - case SHORT: - case BESHORT: - case LESHORT: - v = p->h; - break; - - case LONG: - case BELONG: - case LELONG: - case DATE: - case BEDATE: - case LEDATE: - v = p->l; - break; - - case STRING: - l = 0; - /* - * What we want here is: v = strncmp(m->value.s, p->s, - * m->vallen); but ignoring any nulls. bcmp doesn't give - * -/+/0 and isn't universally available anyway. - */ - v = 0; - { - register unsigned char *a = (unsigned char *) m->value.s; - register unsigned char *b = (unsigned char *) p->s; - register int len = m->vallen; - Q_ASSERT(len); - - while (--len >= 0) - if ((v = *b++ - *a++) != 0) - break; - } - break; - default: - kdError(7018) << "mcheck: invalid type " << m->type << endl; - return 0; /* NOTREACHED */ - } -#if 0 - tqDebug("Before signextend %08x", v); -#endif - v = signextend(m, v) & m->mask; -#if 0 - tqDebug("After signextend %08x", v); -#endif - - switch (m->reln) { - case 'x': - matched = 1; - break; - - case '!': - matched = v != l; - break; - - case '=': - matched = v == l; - break; - - case '>': - if (m->flag & UNSIGNED) - matched = v > l; - else - matched = (long) v > (long) l; - break; - - case '<': - if (m->flag & UNSIGNED) - matched = v < l; - else - matched = (long) v < (long) l; - break; - - case '&': - matched = (v & l) == l; - break; - - case '^': - matched = (v & l) != l; - break; - - default: - matched = 0; - kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl; - break; /* NOTREACHED */ - } - - return matched; +int KMimeMagic::apprentice( const TQString& magicfile ) { + conf->databases.clear(); + conf->databases.append(magicfile); + return magic_load(conf->magic, conf->databases[0].latin1()); } /* @@ -1351,867 +169,121 @@ mcheck(union VALUETYPE *p, struct magic *m) * fixed-size buffer to begin processing the contents. */ -void process(struct config_rec* conf, const TQString & fn) -{ - int fd = 0; - unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ +void process(struct config_rec* conf, const TQString & fn) { KDE_struct_stat sb; - int nbytes = 0; /* number of bytes read from a datafile */ - int tagbytes = 0; /* size of prefixed tag */ TQCString fileName = TQFile::encodeName( fn ); - /* - * first try judging the file based on its filesystem status - */ - if (fsmagic(conf, fileName, &sb) != 0) { - //resultBuf += "\n"; - return; - } - if ((fd = KDE_open(fileName, O_RDONLY)) < 0) { - /* We can't open it, but we were able to stat it. */ - /* - * if (sb.st_mode & 0002) addResult("writable, "); - * if (sb.st_mode & 0111) addResult("executable, "); - */ - //kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl; - conf->resultBuf = MIME_BINARY_UNREADABLE; - return; - } - /* - * try looking at the first HOWMANY bytes - */ - if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) { - kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl; - conf->resultBuf = MIME_BINARY_UNREADABLE; - (void)close(fd); - return; - } - if ((tagbytes = tagmagic(buf, nbytes))) { - // Read buffer at new position - lseek(fd, tagbytes, SEEK_SET); - nbytes = read(fd, (char*)buf, HOWMANY); - if (nbytes < 0) { - conf->resultBuf = MIME_BINARY_UNREADABLE; - (void)close(fd); - return; - } - } - if (nbytes == 0) { - conf->resultBuf = MIME_BINARY_ZEROSIZE; - } else { - buf[nbytes++] = '\0'; /* null-terminate it */ - tryit(conf, buf, nbytes); - } - - if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) ) - { - /* - * Try to restore access, modification times if read it. - * This changes the "change" time (ctime), but we can't do anything - * about that. - */ - struct utimbuf utbuf; - utbuf.actime = sb.st_atime; - utbuf.modtime = sb.st_mtime; - (void) utime(fileName, &utbuf); - } - (void) close(fd); -} - - -static void tryit(struct config_rec* conf, unsigned char *buf, int nb) -{ - /* try tests in /etc/magic (or surrogate magic file) */ - if (match(conf, buf, nb)) - return; - - /* try known keywords, check for ascii-ness too. */ - if (ascmagic(conf, buf, nb) == 1) - return; - - /* see if it's plain text */ - if (textmagic(conf, buf, nb)) - return; - - /* abandon hope, all ye who remain here */ - conf->resultBuf = MIME_BINARY_UNKNOWN; - conf->accuracy = 0; -} - -static int -fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb) -{ - int ret = 0; - - /* - * Fstat is cheaper but fails for files you don't have read perms on. - * On 4.2BSD and similar systems, use lstat() to identify symlinks. - */ - ret = KDE_lstat(fn, sb); /* don't merge into if; see "ret =" above */ - - if (ret) { - return 1; - - } - /* - * if (sb->st_mode & S_ISUID) resultBuf += "setuid "; - * if (sb->st_mode & S_ISGID) resultBuf += "setgid "; - * if (sb->st_mode & S_ISVTX) resultBuf += "sticky "; - */ - - switch (sb->st_mode & S_IFMT) { - case S_IFDIR: - conf->resultBuf = MIME_INODE_DIR; - return 1; - case S_IFCHR: - conf->resultBuf = MIME_INODE_CDEV; - return 1; - case S_IFBLK: - conf->resultBuf = MIME_INODE_BDEV; - return 1; - /* TODO add code to handle V7 MUX and Blit MUX files */ -#ifdef S_IFIFO - case S_IFIFO: - conf->resultBuf = MIME_INODE_FIFO; - return 1; -#endif -#ifdef S_IFLNK - case S_IFLNK: - { - char buf[BUFSIZ + BUFSIZ + 4]; - register int nch; - KDE_struct_stat tstatbuf; - - if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) { - conf->resultBuf = MIME_INODE_LINK; - //conf->resultBuf += "\nunreadable"; - return 1; - } - buf[nch] = '\0'; /* readlink(2) forgets this */ - /* If broken symlink, say so and quit early. */ - if (*buf == '/') { - if (KDE_stat(buf, &tstatbuf) < 0) { - conf->resultBuf = MIME_INODE_LINK; - //conf->resultBuf += "\nbroken"; - return 1; - } - } else { - char *tmp; - char buf2[BUFSIZ + BUFSIZ + 4]; - - strncpy(buf2, fn, BUFSIZ); - buf2[BUFSIZ] = 0; - - if ((tmp = strrchr(buf2, '/')) == NULL) { - tmp = buf; /* in current dir */ - } else { - /* dir part plus (rel.) link */ - *++tmp = '\0'; - strcat(buf2, buf); - tmp = buf2; - } - if (KDE_stat(tmp, &tstatbuf) < 0) { - conf->resultBuf = MIME_INODE_LINK; - //conf->resultBuf += "\nbroken"; - return 1; - } else - strcpy(buf, tmp); - } - if (conf->followLinks) - process( conf, TQFile::decodeName( buf ) ); - else - conf->resultBuf = MIME_INODE_LINK; - return 1; - } - return 1; -#endif -#ifdef S_IFSOCK -#ifndef __COHERENT__ - case S_IFSOCK: - conf->resultBuf = MIME_INODE_SOCK; - return 1; -#endif -#endif - case S_IFREG: - break; - default: - kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl; - /* NOTREACHED */ - } - - /* - * regular file, check next possibility - */ - if (sb->st_size == 0) { - conf->resultBuf = MIME_BINARY_ZEROSIZE; - return 1; - } - return 0; -} - -/* - * Go through the whole list, stopping if you find a match. Process all the - * continuations of that match before returning. - * - * We support multi-level continuations: - * - * At any time when processing a successful top-level match, there is a current - * continuation level; it represents the level of the last successfully - * matched continuation. - * - * Continuations above that level are skipped as, if we see one, it means that - * the continuation that controls them - i.e, the lower-level continuation - * preceding them - failed to match. - * - * Continuations below that level are processed as, if we see one, it means - * we've finished processing or skipping higher-level continuations under the - * control of a successful or unsuccessful lower-level continuation, and are - * now seeing the next lower-level continuation and should process it. The - * current continuation level reverts to the level of the one we're seeing. - * - * Continuations at the current level are processed as, if we see one, there's - * no lower-level continuation that may have failed. - * - * If a continuation matches, we bump the current continuation level so that - * higher-level continuations are processed. - */ -static int -match(struct config_rec* conf, unsigned char *s, int nbytes) -{ - int cont_level = 0; - union VALUETYPE p; - struct magic *m; - -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; - for (m = conf->magic; m; m = m->next) { - if (isprint((((unsigned long) m) >> 24) & 255) && - isprint((((unsigned long) m) >> 16) & 255) && - isprint((((unsigned long) m) >> 8) & 255) && - isprint(((unsigned long) m) & 255)) { - kdDebug(7018) << "match: POINTER CLOBBERED! " << endl; - break; - } + int magic_flags = MAGIC_CONTINUE|MAGIC_ERROR|MAGIC_MIME_TYPE/*|MAGIC_DEBUG*/; + if (conf->followLinks) { + magic_flags |= MAGIC_SYMLINK; } -#endif - - for (m = conf->magic; m; m = m->next) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl; -#endif - memset(&p, 0, sizeof(union VALUETYPE)); - - /* check if main entry matches */ - if (!mget(&p, s, m, nbytes) || - !mcheck(&p, m)) { - struct magic *m_cont; - - /* - * main entry didn't match, flush its continuations - */ - if (!m->next || (m->next->cont_level == 0)) { - continue; - } - m_cont = m->next; - while (m_cont && (m_cont->cont_level != 0)) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl; -#endif - /* - * this trick allows us to keep *m in sync - * when the continue advances the pointer - */ - m = m_cont; - m_cont = m_cont->next; - } - continue; - } - /* if we get here, the main entry rule was a match */ - /* this will be the last run through the loop */ -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; -#endif + magic_setflags(conf->magic, magic_flags); + conf->resultBuf = TQString(magic_file(conf->magic, fileName)); + conf->resultBuf = fixupMagicOutput(conf->resultBuf); - /* remember the match */ - conf->resultBuf = m->desc; - - cont_level++; + if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) ) { /* - * while (m && m->next && m->next->cont_level != 0 && ( m = - * m->next )) - */ - m = m->next; - while (m && (m->cont_level != 0)) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; -#endif - if (cont_level >= m->cont_level) { - if (cont_level > m->cont_level) { - /* - * We're at the end of the level - * "cont_level" continuations. - */ - cont_level = m->cont_level; - } - if (mget(&p, s, m, nbytes) && - mcheck(&p, m)) { - /* - * This continuation matched. Print - * its message, with a blank before - * it if the previous item printed - * and this item isn't empty. - */ -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "continuation matched" << endl; -#endif - conf->resultBuf = m->desc; - cont_level++; - } - } - /* move to next continuation record */ - m = m->next; + * Try to restore access, modification times if read it. + * This changes the "change" time (ctime), but we can't do anything + * about that. + */ + struct utimbuf utbuf; + utbuf.actime = sb.st_atime; + utbuf.modtime = sb.st_mtime; + (void) utime(fileName, &utbuf); + } +} + +KMimeMagic::KMimeMagic() { + // Magic file detection init + TQString mimefile = locate( "mime", "magic" ); + init( mimefile ); + // Add snippets from share/config/magic/* + TQStringList snippets = TDEGlobal::dirs()->findAllResources( "config", "magic/*.magic", true ); + for ( TQStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it ) { + if ( !mergeConfig( *it ) ) { + kdWarning() << k_funcinfo << "Failed to parse " << *it << endl; } - // KDE-specific: need an actual mimetype for a real match - // If we only matched a rule with continuations but no mimetype, it's not a match - if ( !conf->resultBuf.isEmpty() ) - { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: matched" << endl; -#endif - return 1; /* all through */ - } } -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: failed" << endl; -#endif - return 0; /* no match at all */ } -// Try to parse prefixed tags before matching on content -// Sofar only ID3v2 tags (<=.4) are handled -static int tagmagic(unsigned char *buf, int nbytes) -{ - if(nbytes<40) return 0; - if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') { - int size = 10; - // Sanity (known version, no unknown flags) - if(buf[3] > 4) return 0; - if(buf[5] & 0x0F) return 0; - // Tag has v4 footer - if(buf[5] & 0x10) size += 10; - // Calculated syncsafe size - size += buf[9]; - size += buf[8] << 7; - size += buf[7] << 14; - size += buf[6] << 21; - return size; - } - return 0; -} - -struct Token { - char *data; - int length; -}; - -struct Tokenizer -{ - Tokenizer(char* buf, int nbytes) { - data = buf; - length = nbytes; - pos = 0; - } - bool isNewLine() { - return newline; - } - Token* nextToken() { - if (pos == 0) - newline = true; - else - newline = false; - token.data = data+pos; - token.length = 0; - while(pos<length) { - switch (data[pos]) { - case '\n': - newline = true; - case '\0': - case '\t': - case ' ': - case '\r': - case '\f': - case ',': - case ';': - case '>': - if (token.length == 0) token.data++; - else - return &token; - break; - default: - token.length++; - } - pos++; - } - return &token; - } - -private: - Token token; - char* data; - int length; - int pos; - bool newline; -}; - - -/* an optimization over plain strcmp() */ -//#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) -static inline bool STREQ(const Token *token, const char *b) { - const char *a = token->data; - int len = token->length; - if (a == b) return true; - while(*a && *b && len > 0) { - if (*a != *b) return false; - a++; b++; len--; - } - return (len == 0 && *b == 0); -} - -static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes) -{ - int i; - double pct, maxpct, pctsum; - double pcts[NTYPES]; - int mostaccurate, tokencount; - int typeset, jonly, conly, jconly, objconly, cpponly; - int has_escapes = 0; - //unsigned char *s; - //char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ - - /* these are easy, do them first */ - conf->accuracy = 70; - - /* - * for troff, look for . + letter + letter or .\"; this must be done - * to disambiguate tar archives' ./file and other trash from real - * troff input. - */ - if (*buf == '.') { - unsigned char *tp = buf + 1; - - while (isascii(*tp) && isspace(*tp)) - ++tp; /* skip leading whitespace */ - if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') && - isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) { - conf->resultBuf = MIME_APPL_TROFF; - return 1; - } - } - if ((*buf == 'c' || *buf == 'C') && - isascii(*(buf + 1)) && isspace(*(buf + 1))) { - /* Fortran */ - conf->resultBuf = MIME_TEXT_FORTRAN; - return 1; - } - assert(nbytes-1 < HOWMANY + 1); - /* look for tokens - this is expensive! */ - has_escapes = (memchr(buf, '\033', nbytes) != NULL); - Tokenizer tokenizer((char*)buf, nbytes); - const Token* token; - bool linecomment = false, blockcomment = false; - const struct names *p; - int typecount[NTYPES]; -/* - * Fritz: - * Try a little harder on C/C++/Java. - */ - memset(&typecount, 0, sizeof(typecount)); - typeset = 0; - jonly = 0; - conly = 0; - jconly = 0; - objconly = 0; - cpponly = 0; - tokencount = 0; - bool foundClass = false; // mandatory for java - // first collect all possible types and count matches - // we stop at '>' too, because of "<title>blah</title>" on HTML pages - while ((token = tokenizer.nextToken())->length > 0) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl; -#endif - if (linecomment && tokenizer.isNewLine()) - linecomment = false; - if (blockcomment && STREQ(token, "*/")) { - blockcomment = false; - continue; - } - for (p = names; p->name ; p++) { - if (STREQ(token, p->name)) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl; -#endif - tokencount++; - typeset |= p->type; - if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) { - if (linecomment || blockcomment) { - continue; - } - else { - switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) - { - case L_JAVA: - jonly++; - break; - case L_OBJC: - objconly++; - break; - case L_CPP: - cpponly++; - break; - case (L_CPP|L_JAVA): - jconly++; - if ( !foundClass && STREQ(token, "class") ) - foundClass = true; - break; - case (L_C|L_CPP): - conly++; - break; - default: - if (STREQ(token, "//")) linecomment = true; - if (STREQ(token, "/*")) blockcomment = true; - } - } - } - for (i = 0; i < (int)NTYPES; i++) { - if ((1 << i) & p->type) typecount[i]+= p->type & FLAG_STRONG ? 2 : 1; - } - } - } - } - - if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) { - conf->accuracy = 60; - if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl; -#endif - if (jonly > 1 && foundClass) { - // At least two java-only tokens have matched, including "class" - conf->resultBuf = TQString(types[P_JAVA].type); - return 1; - } - if (jconly > 1) { - // At least two non-C (only C++ or Java) token have matched. - if (typecount[P_JAVA] < typecount[P_CPP]) - conf->resultBuf = TQString(types[P_CPP].type); - else - conf->resultBuf = TQString(types[P_JAVA].type); - return 1; - } - if (conly + cpponly > 1) { - // Either C or C++. - if (cpponly > 0) - conf->resultBuf = TQString(types[P_CPP].type); - else - conf->resultBuf = TQString(types[P_C].type); - return 1; - } - if (objconly > 0) { - conf->resultBuf = TQString(types[P_OBJC].type); - return 1; - } - } - } - - /* Neither C, C++ or Java (or all of them without able to distinguish): - * Simply take the token-class with the highest - * matchcount > 0 - */ - mostaccurate = -1; - maxpct = pctsum = 0.0; - for (i = 0; i < (int)NTYPES; i++) { - if (typecount[i] > 1) { // one word is not enough, we need at least two - pct = (double)typecount[i] / (double)types[i].kwords * - (double)types[i].weight; - pcts[i] = pct; - pctsum += pct; - if (pct > maxpct) { - maxpct = pct; - mostaccurate = i; - } -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl; -#endif - } - } - if (mostaccurate >= 0) { - if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java - { - conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60); -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl; -#endif - conf->resultBuf = TQString(types[mostaccurate].type); - return 1; - } - } - - switch (is_tar(buf, nbytes)) { - case 1: - /* V7 tar archive */ - conf->resultBuf = MIME_APPL_TAR; - conf->accuracy = 90; - return 1; - case 2: - /* POSIX tar archive */ - conf->resultBuf = MIME_APPL_TAR; - conf->accuracy = 90; - return 1; - } - - for (i = 0; i < nbytes; i++) { - if (!isascii(*(buf + i))) - return 0; /* not all ascii */ - } - - /* all else fails, but it is ascii... */ - conf->accuracy = 90; - if (has_escapes) { - /* text with escape sequences */ - /* we leave this open for further differentiation later */ - conf->resultBuf = MIME_TEXT_UNKNOWN; - } else { - /* plain text */ - conf->resultBuf = MIME_TEXT_PLAIN; - } - return 1; -} - - -/* This code is taken from the "file" command, where it is licensed - * in the "beer-ware license" :-) - * Original author: <joerg@FreeBSD.ORG> - * Simplified by David Faure to avoid the static array char[256]. - * Drastically simplified by Laurent Dard for the Trinity Desktop Environment - * Configuration files with big lines are still text files: - * line length checking is now avoided here. - */ -static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes) -{ - int i; - unsigned char *cp; - - nbytes--; - - /* Look whether there are "unreasonable" characters. */ - for (i = 0, cp = buf; i < nbytes; i++, cp++) - if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F)) - return 0; - - conf->resultBuf = MIME_TEXT_PLAIN; - return 1; +KMimeMagic::KMimeMagic(const TQString & _configfile) { + init( _configfile ); } - -/* - * is_tar() -- figure out whether file is a tar archive. - * - * Stolen (by author of file utility) from the public domain tar program: Public - * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). - * - * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 - * 1997/06/24 00:41:02 ikluft Exp ikluft $ - * - * Comments changed and some code/comments reformatted for file command by Ian - * Darwin. - */ - -#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) - -/* - * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for - * old UNIX tar file, 2 for Unix Std (POSIX) tar file. - */ - -static int -is_tar(unsigned char *buf, int nbytes) -{ - register union record *header = (union record *) buf; - register int i; - register long sum, - recsum; - register char *p; - - if (nbytes < (int)sizeof(union record)) - return 0; - - recsum = from_oct(8, header->header.chksum); - - sum = 0; - p = header->charptr; - for (i = sizeof(union record); --i >= 0;) { - /* - * We can't use unsigned char here because of old compilers, - * e.g. V7. - */ - sum += 0xFF & *p++; - } - - /* Adjust checksum to count the "chksum" field as blanks. */ - for (i = sizeof(header->header.chksum); --i >= 0;) - sum -= 0xFF & header->header.chksum[i]; - sum += ' ' * sizeof header->header.chksum; - - if (sum != recsum) - return 0; /* Not a tar archive */ - - if (0 == strcmp(header->header.magic, TMAGIC)) - return 2; /* Unix Standard tar archive */ - - return 1; /* Old fashioned tar archive */ -} - - -/* - * Quick and dirty octal conversion. - * - * Result is -1 if the field is invalid (all blank, or nonoctal). - */ -static long -from_oct(int digs, char *where) -{ - register long value; - - while (isspace(*where)) { /* Skip spaces */ - where++; - if (--digs <= 0) - return -1; /* All blank field */ - } - value = 0; - while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ - value = (value << 3) | (*where++ - '0'); - --digs; - } - - if (digs > 0 && *where && !isspace(*where)) - return -1; /* Ended on non-space/nul */ - - return value; -} - -KMimeMagic::KMimeMagic() -{ - // Magic file detection init - TQString mimefile = locate( "mime", "magic" ); - init( mimefile ); - // Add snippets from share/config/magic/* - TQStringList snippets = TDEGlobal::dirs()->findAllResources( "config", "magic/*.magic", true ); - for ( TQStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it ) - if ( !mergeConfig( *it ) ) - kdWarning() << k_funcinfo << "Failed to parse " << *it << endl; -} - -KMimeMagic::KMimeMagic(const TQString & _configfile) -{ - init( _configfile ); -} - -void KMimeMagic::init( const TQString& _configfile ) -{ +void KMimeMagic::init( const TQString& _configfile ) { int result; conf = new config_rec; - /* set up the magic list (empty) */ - conf->magic = conf->last = NULL; + /* initialize libmagic */ + conf->magic = magic_open(MAGIC_MIME_TYPE); magicResult = NULL; conf->followLinks = false; conf->utimeConf = 0L; // created on demand /* on the first time through we read the magic file */ result = apprentice(_configfile); - if (result == -1) + if (result == -1) { return; -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif + } } /* * The destructor. * Free the magic-table and other resources. */ -KMimeMagic::~KMimeMagic() -{ +KMimeMagic::~KMimeMagic() { if (conf) { - struct magic *p = conf->magic; - struct magic *q; - while (p) { - q = p; - p = p->next; - free(q); - } + magic_close(conf->magic); delete conf->utimeConf; delete conf; } delete magicResult; } -bool -KMimeMagic::mergeConfig(const TQString & _configfile) -{ - kdDebug(7018) << k_funcinfo << _configfile << endl; - int result; - - if (_configfile.isEmpty()) - return false; - result = apprentice(_configfile); - if (result == -1) { +bool KMimeMagic::mergeConfig(const TQString & _configfile) { + conf->databases.append(_configfile); + TQString merged_databases = conf->databases.join(":"); + if (magic_load(conf->magic, merged_databases.latin1()) == 0) { + return true; + } + else { return false; } -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif - return true; } -void -KMimeMagic::setFollowLinks( bool _enable ) -{ +void KMimeMagic::setFollowLinks( bool _enable ) { conf->followLinks = _enable; } -KMimeMagicResult * -KMimeMagic::findBufferType(const TQByteArray &array) -{ - unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ - +KMimeMagicResult *KMimeMagic::findBufferType(const TQByteArray &array) { conf->resultBuf = TQString::null; - if ( !magicResult ) - magicResult = new KMimeMagicResult(); + if ( !magicResult ) { + magicResult = new KMimeMagicResult(); + } magicResult->setInvalid(); conf->accuracy = 100; int nbytes = array.size(); - - if (nbytes > HOWMANY) - nbytes = HOWMANY; - memcpy(buf, array.data(), nbytes); - if (nbytes == 0) { - conf->resultBuf = MIME_BINARY_ZEROSIZE; - } else { - buf[nbytes++] = '\0'; /* null-terminate it */ - tryit(conf, buf, nbytes); - } - /* if we have any results, put them in the request structure */ + if (nbytes == 0) { + conf->resultBuf = MIME_BINARY_ZEROSIZE; + } + else { + int magic_flags = MAGIC_CONTINUE|MAGIC_ERROR|MAGIC_MIME_TYPE/*|MAGIC_DEBUG*/; + if (conf->followLinks) { + magic_flags |= MAGIC_SYMLINK; + } + magic_setflags(conf->magic, magic_flags); + conf->resultBuf = TQString(magic_buffer(conf->magic, array.data(), nbytes)); + conf->resultBuf = fixupMagicOutput(conf->resultBuf); + } + /* if we have any results, put them in the request structure */ magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); magicResult->setAccuracy(conf->accuracy); - return magicResult; + return magicResult; } -static void -refineResult(KMimeMagicResult *r, const TQString & _filename) -{ +static void refineResult(KMimeMagicResult *r, const TQString & _filename) { TQString tmp = r->mimeType(); if (tmp.isEmpty()) return; @@ -2246,10 +318,7 @@ refineResult(KMimeMagicResult *r, const TQString & _filename) } } -KMimeMagicResult * -KMimeMagic::findBufferFileType( const TQByteArray &data, - const TQString &fn) -{ +KMimeMagicResult *KMimeMagic::findBufferFileType( const TQByteArray &data, const TQString &fn) { KMimeMagicResult * r = findBufferType( data ); refineResult(r, fn); return r; @@ -2258,28 +327,29 @@ KMimeMagic::findBufferFileType( const TQByteArray &data, /* * Find the content-type of the given file. */ -KMimeMagicResult* KMimeMagic::findFileType(const TQString & fn) -{ +KMimeMagicResult* KMimeMagic::findFileType(const TQString & fn) { #ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl; + kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl; #endif - conf->resultBuf = TQString::null; - - if ( !magicResult ) - magicResult = new KMimeMagicResult(); + conf->resultBuf = TQString::null; + + if ( !magicResult ) { + magicResult = new KMimeMagicResult(); + } magicResult->setInvalid(); conf->accuracy = 100; - - if ( !conf->utimeConf ) - conf->utimeConf = new KMimeMagicUtimeConf(); - - /* process it based on the file contents */ - process(conf, fn ); - - /* if we have any results, put them in the request structure */ - //finishResult(); + + if ( !conf->utimeConf ) { + conf->utimeConf = new KMimeMagicUtimeConf(); + } + + /* process it based on the file contents */ + process(conf, fn ); + + /* if we have any results, put them in the request structure */ + //finishResult(); magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); magicResult->setAccuracy(conf->accuracy); refineResult(magicResult, fn); - return magicResult; + return magicResult; } diff --git a/tdeio/tdeio/kmimemagic.h b/tdeio/tdeio/kmimemagic.h index d812650f2..1dce2c7af 100644 --- a/tdeio/tdeio/kmimemagic.h +++ b/tdeio/tdeio/kmimemagic.h @@ -43,7 +43,7 @@ class KMimeMagic; // see below (read this one first) * It contains the mimetype and the encoding of * the file or buffer read. */ -class TDEIO_EXPORT_DEPRECATED KMimeMagicResult +class TDEIO_EXPORT KMimeMagicResult { public: KMimeMagicResult() { m_iAccuracy = 100; } @@ -98,7 +98,7 @@ protected: * * The result is contained in the class KMimeMagicResult. */ -class TDEIO_EXPORT_DEPRECATED KMimeMagic +class TDEIO_EXPORT KMimeMagic { public: /** |