diff options
Diffstat (limited to 'indenters/examples/hindent')
-rwxr-xr-x | indenters/examples/hindent | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/indenters/examples/hindent b/indenters/examples/hindent new file mode 100755 index 0000000..dc71264 --- /dev/null +++ b/indenters/examples/hindent @@ -0,0 +1,294 @@ +#!/usr/bin/perl +# +# hindent 1.1.2 +# +# Properly indent HTML code and convert tags to uppercase like the Gods intended. +# Understands all nesting tags defined under the HTML 3.2 standard. +# +# by Paul Balyoz <pab@domtools.com> +# +# Usage: +# hindent [-fslcv] [-i num] [file ...] > newfile +# +# Options: +# -f Flow - just prints tags _without_args_, for visual checking. +# NOTE: This option DAMAGES the HTML code. The output is for +# human debugging use ONLY. Keep your original file!! +# -s Strict - prints 1 tag per line with proper indenting. +# Helpful for deciphering HTML code that's all on one line. +# NOTE: This slightly DAMAGES the HTML code because it introduces +# whitespace around tags that had none before, which will mess up +# formatting somewhat on the page (links will have extra spaces, etc). +# -i num Set indentation to this many characters. +# -l List all the tags we recognize and exit. +# -c Lowercase HTML tags. (Uppercase is default) +# -v Print version of hindent and exit. +# +# Copyright (C) 1993-1999 Paul A. Balyoz <pab@domtools.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + +# How many spaces to indent per level? +# (sets of 8-spaces will be automatically converted to tabs intelligently). +# You can use any value here, some recommendations: 8, 4, 3, or 2 +$spacesperlevel = 2; + +# How many spaces does a "tab" occupy on your screen? +# Unix generally uses 8-space-tabs, but it's user-configurable in most editors. +# If tabs are not turned off (-t0) then we output 1 tab character for every +# $tabstop spaces we need to output. +$tabstop = 8; + +# Tags that require their own end tag <TAG>...</TAG> we will nest them +# properly: (WARNING, you must use lower-case here) +# All other tags (not on this list) will be ignored for indenting purposes. +%nesttag = ( + 'html' => 1, + 'head' => 1, + 'body' => 1, + 'title' => 1, + + 'a' => 1, + + 'table' => 1, + 'tr' => 1, + 'th' => 1, + 'td' => 1, + + 'form' => 1, + 'select' => 1, + 'textarea' => 1, + +# 'p' => 1, Don't do this one because many people use <P> but not </P> + 'ul' => 1, + 'ol' => 1, + 'dl' => 1, + 'blockquote' => 1, + 'center' => 1, + 'div' => 1, + + 'font' => 1, + 'pre' => 1, + 'tt' => 1, + 'i' => 1, + 'b' => 1, + 'u' => 1, + 'strike' => 1, + 'big' => 1, + 'small' => 1, + 'sub' => 1, + 'sup' => 1, + 'em' => 1, + 'strong' => 1, + 'dfn' => 1, + 'code' => 1, + 'samp' => 1, + 'kbd' => 1, + 'var' => 1, + 'cite' => 1, + + 'h1' => 1, + 'h2' => 1, + 'h3' => 1, + 'h4' => 1, + 'h5' => 1, + 'h6' => 1, + + 'applet' => 1, + + 'map' => 1, + + 'frameset' => 1, + 'noframes' => 1, +); + + +#-------------------\ +# END CONFIGURATIONS =================================================================== +#-------------------/ + +use Getopt::Std; + + +# +# Parse args +# + +sub usageexit { + print STDERR "usage: hindent [-fslcv] [-i num] [-t num] [file ...] > newfile\n"; + exit 1; +} + +getopts('fsi:lvt:c') || &usageexit; +if (defined $opt_i) { + if ($opt_i < 0 || $opt_i > 10) { + print STDERR "$0: error: indentation factor '$opt_i' not in range 0..10.\n"; + &usageexit; + } else { + $spacesperlevel = $opt_i; + } +} +if (defined $opt_t) { + if ($opt_t < 0 || $opt_t > 12) { + print STDERR "$0: error: indentation factor '$opt_i' not in range 0..12.\n"; + &usageexit; + } else { + $tabstop = $opt_t; + } +} + + +# +# If -l option, just list tags and exit. +# + +if ($opt_l) { + print "hindent recognizes these HTML tags:\n"; + for $tag (sort(keys(%nesttag))) { + $tag =~ tr/a-z/A-Z/; + print "$tag\n"; + } + exit 0; +} + + +# +# If -v option, just print version and exit. +# + +if ($opt_v) { + print "hindent version 1.1.2\n"; + exit 0; +} + + +# +# Main HTML parsing code +# + +$level = 0; # indentation level +$changelevel = 0; # change in indentation level (delta) +$out = ""; # accumulated output string +while (<>) { + chomp; # some HTML has no newline on last line, chop mangles it. + s/^\s+//; # remove ALL preceding whitespace, we rebuild it ourselves + $line++; + + $end = -1; + $start = $len = 0; + while (/<(.*?)>/g) { + $end = $start+$len-1; # of previous values + $start = length($`); + $len = 1 + length($1) + 1; + ($tag,$arg) = split(/\s+/,$1,2); + if (!$opt_f) { + $out .= substr($_, $end+1, $start-($end+1)); # print stuff from last tag to here + } + if ($opt_c) { + $tag =~ tr/A-Z/a-z/; + } else { + $tag =~ tr/a-z/A-Z/; + } + if ($arg && !$opt_f) { + $out .= "<$tag $arg>"; + } else { + $out .= "<$tag>"; + } + + # if regular tag, push it on stack; if end-tag, pop it off stack. + # but don't do any of this if it's not a special "nesting" tag! + if ($tag !~ m,^/,) { + if ($nesttag{lc($tag)}) { + push @tagstack,$tag; + $changelevel++; # remember how much for later + } + } else { + $tag =~ s,^/,,; # convert this end-tag to a begin-tag + $tag = lc($tag); + if ($nesttag{lc($tag)}) { + # throw away tags until we find a match + if ($#tagstack > -1) { + while ($tag ne lc(pop @tagstack)) { + $changelevel--; # we threw away extra tags + last if $#tagstack <= 0; + } + $changelevel--; # we threw away extra tags + if ($level+$changelevel < 0) { + print STDERR "line $line: saw more end tags than begin ones!\n"; + $changelevel = -$level; + } + } + } + } + &printout if $opt_s; # -s -> print every tag on new line + } + + # + # Print rest of line after the last match, and newline. + # (not part of Flow) + # + if (!$opt_f) { + $end = $start+$len-1; + $out .= substr($_,$end+1,length($_)-($end+1)); + } + + &printout; +} + +# Any tags left on the stack? +if ($level > 0) { + print STDERR "WARNING: level=$level, ", $#tagstack+1," tags left on stack after done parsing! Specifically:\n"; + while ($tag = pop @tagstack) { + print STDERR "\t$tag"; + } +} + +exit 0; + + +# +# Print this line of data indented properly. +# +sub printout { + my($numtabs) = 0; + + # + # To OUTdent, do that BEFORE printing. + # + if ($changelevel < 0) { + $level += $changelevel; + $changelevel = 0; + } + + # + # Print indents and this line of output + # + $spaces = " " x ($level * $spacesperlevel); + $numtabs = int(length($spaces)/$tabstop) if $tabstop; + print "\t" x $numtabs; # print the tabs + print " " x (length($spaces)-$numtabs*$tabstop); # print the spaces + print "$out\n"; + $out = ""; + + # + # To INdent, do that AFTER printing. + # + if ($changelevel > 0) { + $level += $changelevel; + $changelevel = 0; + } +} |