#!/usr/bin/perl -w
# tries to reduce the number of includes in KDE source files
# (c) 2001-2003 Dirk Mueller <mueller@kde.org>

use File::Basename;
use Cwd;

# declaration of useful subroutines
sub find_src_includes($);
sub find_fixable_sources ($);
sub find_fixable_headers($);
sub find_removable_includes ($);
sub warn_before_modifying ($);
sub remove_include ($$$);
sub replace_include ($$$);
sub replace_include_type ($$);
sub fix_duplicates($);
sub fix_compat_includes($);
sub fix_unnecessary($);
sub fix_include_type($);
sub copy_file($$);
sub process_source_file($);
sub extract_gcc_error($);

# some global variables
$verbose = 0; # turns on debugging
$modify = 0;  # if 1 it should try to fix the files as well
$experimental = 0; # try&error if an include is obsolete (slow!!)
@explicitfiles = (); # filled in if passing files on the command line

# statistic variables
$exp_success = 0;
$exp_failure = 0;

while (defined ($ARGV[0]))
{
  $_ = shift;
  if (/^--help$|^-h$/)  {
    print "Usage: fixkdeincludes [--verbose | -v] [--experimental | -e ] [--modify | -m ]\n";
    exit 0;
  }
  elsif (/^--verbose$|^-v$/) {
    $verbose = 1;       # Oh is there a problem...?
  }
  elsif (/^--modify$|^-m$/) {
    $modify = 1;
  }
  elsif (/^--experimental$|^-e$/) {
    $modify = 1;
    $experimental = 1;
  }
  elsif (!/^-/) {
    push @explicitfiles, $_;
  }
}

$cppExt     = "(cpp|cc|cxx|C|c\\+\\+)";
$hExt       = "(h|H|hh|hxx|hpp|h\\+\\+)";

# list of compat headers. scroll down ... much of boring stuff here..
%compatmap = (
 'qapp.h'        =>  "qapplication.h",
 'qarray.h'      =>  "qmemarray.h",
 'qbitarry.h'    =>  "qbitarray.h",
 'qbttngrp.h'    =>  "qbuttongroup.h",
 'qchkbox.h'     =>  "qcheckbox.h",
 'qclipbrd.h'    =>  "qclipboard.h",
 'qcollect.h'    =>  "qptrcollection.h",
 'qcollection.h' =>  "qptrcollection.h",
 'qcombo.h'      =>  "qcombobox.h",
 'qconnect.h'    =>  "qconnection.h",
 'qdatetm.h'     =>  "qdatetime.h",
 'qdrawutl.h'    =>  "qdrawutil.h",
 'qdstream.h'    =>  "qdatastream.h",
 'qfiledef.h'    =>  "private/qfiledefs_p.h",
 'qfiledlg.h'    =>  "qfiledialog.h",
 'qfileinf.h'    =>  "qfileinfo.h",
 'qfontdta.h'    =>  "qfontdata.h",
 'qfontinf.h'    =>  "qfontinfo.h",
 'qfontmet.h'    =>  "qfontmetrics.h",
 'qgrpbox.h'     =>  "qgroupbox.h",
 'qintcach.h'    =>  "qintcache.h",
 'qiodev.h'      =>  "qiodevice.h",
 'qlcdnum.h'     =>  "qlcdnumber.h",
 'qlined.h'      =>  "qlineedit.h",
 'qlist.h'       =>  "qptrlist.h",
 'qmenudta.h'    =>  "qmenudata.h",
 'qmetaobj.h'    =>  "qmetaobject.h",
 'qmlined.h'     =>  "qtmultilineedit.h",
 'qmsgbox.h'     =>  "qmessagebox.h",
 'qmultilinedit.h' =>  "qmultilineedit.h",
 'qobjcoll.h'    =>  "qobjectlist.h>\n\#include <qobjectdict.h",
 'qobjdefs.h'    =>  "qobjectdefs.h",
 'qpaintd.h'     =>  "qpaintdevice.h",
 'qpaintdc.h'    =>  "qpaintdevicedefs.h",
 'qpdevmet.h'    =>  "qpaintdevicemetrics.h",
 'qpmcache.h'    =>  "qpixmapcache.h",
 'qpntarry.h'    =>  "qpointarray.h",
 'qpopmenu.h'    =>  "qpopupmenu.h",
 'qprndlg.h'     =>  "qprintdialog.h",
 'qprogbar.h'    =>  "qprogressbar.h",
 'qprogdlg.h'    =>  "qprogressdialog.h",
 'qpsprn.h'      =>  "private/qpsprinter_p.h",
 'qpushbt.h'     =>  "qpushbutton.h",
 'qqueue.h'      =>  "qptrqueue.h",
 'qradiobt.h'    =>  "qradiobutton.h",
 'qrangect.h'    =>  "qrangecontrol.h",
 'qscrbar.h'     =>  "qscrollbar.h",
 'qsocknot.h'    =>  "qsocketnotifier.h",
 'qstack.h'      =>  "qptrstack.h",
 'qtabdlg.h'     =>  "qtabdialog.h",
 'qtstream.h'    =>  "qtextstream.h",
 'qvector.h'     =>  "qptrvector.h",
 'qwidcoll.h'    =>  "qwidgetlist.h>\n\#include <qwidgetintdict.h",
 'qwindefs.h'    =>  "qwindowdefs.h",

# and now the KDE specific compat includes
 'kapp.h'        =>  "kapplication.h",
 'kstddirs.h'    =>  "kstandarddirs.h",
 'kuniqueapp.h'  =>  "kuniqueapplication.h",
 'ktmainwindow.h'=>  "kmainwindow.h",
 'kcolorbtn.h'   =>  "kcolorbutton.h",
 'kcolordlg.h'   =>  "kcolordialog.h",
 'kxmlgui.h'     =>  "kxmlguifactory.h",
 'kdebugclasses.h' => "kdebug.h",
);


# now it starts to get interesting again

# Look for source files in the given directory ($dir, first parameter)
sub find_fixable_sources ($)
{
  # for now I grep the directory (requires srcdir==builddir)
  # actually it should read the Makefile and
  # find the _SOURCES / _OBJECTS tags that are put there by
  # automake and am_edit, but thats an excercise to the reader ;-)

  my ( $dir ) = @_;

  opendir (DIR, "$dir") || die "Couldn't read '$dir'\n";
  my @sources = grep { /^.*\.$cppExt$/o } readdir(DIR);
  closedir(DIR);
  
  print "found sources: [ " . join(' ', @sources) . " ] in $dir\n" if ($verbose);

  # prefix them with $dir
  my @retsources = ();
  foreach $source(@sources) {
    push @retsources, "$dir/$source";
  }
  
  return @retsources;
}

# Look for header files in the given directory ($dir, first parameter)
sub find_fixable_headers ($)
{
  # for now I grep the directory (requires srcdir==builddir)
  # actually it should read the Makefile and
  # find the _HEADERS tags that are put there by
  # automake and am_edit, but thats an excercise to the reader ;-)

  my ( $dir ) = @_;

  opendir (DIR, "$dir") || die "Couldn't read '$dir'\n";
  my @headers = grep { /^.*\.$hExt$/o } readdir(DIR);
  closedir(DIR);
  
  print "found headers: [ " . join(' ', @headers) . " ] in $dir\n" if ($verbose);

  # prefix them with $dir
  my @retheaders = ();
  foreach $source(@headers) {
    push @retheaders, "$dir/$source";
  }
  
  return @retheaders;
}

sub find_removable_includes ($)
{
  my $srcfile = shift @_;
  open(SRC, "< $srcfile") || die "find_removable_includes: couldn't open '$srcfile'\n";
  
  my @includes = ();

  # we skip all includes that are somehow ifdefed

  my $cpplevel = 0;
  $cpplevel = -1 if ($srcfile=~m/^.*\.$hExt$/); # plan for header-protection #ifndef/#define/#endif
  while (<SRC>) {
    if ($_ =~ m/^\#if/) {
      $cpplevel = $cpplevel + 1;
      next;
    }
    if ($_ =~ m/^\#endif/) {
      $cpplevel = $cpplevel - 1;
      next;
    }
    #if ($cpplevel == 0 && $_ =~ m/^\#include\s*[\"<]([qk]\S*\.h)[\">]\S*/) {
    if ($cpplevel == 0 && (($_ =~ m/^\#include\s*\"(\S+\.h)\"\S*/) || ($_ =~ m/^\#include\s*\<([qk]\S+.h)\>\S*/)))  {
      push @includes, $1;
      next;
    }
  }
  close SRC;

  print "No fixable includes found in $srcfile\n" if ($verbose and not @includes);
  print "found includes: [ " . join(' ', @includes) . " ]\n" if ($verbose and @includes);

  return @includes;
}

sub find_installed_headers($)
{
  my $sdir = shift @_;
  my @includes = ();

  open(I, "<$sdir/Makefile.am") || die "couldn't open $sdir/Makefile.am $!";

  my $data = join('', <I>);
  $data =~ s/\\\s*\n/ /g;

  # now search for not installed headers
  foreach my $line (split /^/, $data) {
    if($line =~ /^(\w+)_HEADERS\s*=(.*)$/) {
      next if $1 eq "noinst";
      push @includes, split (' ', $2);
    }
  }
  close(I);
  return @includes;
}

# first parameter: srcfile
# second parameter: include to remove
# third parameter is the duplicate level: this include is removed $level times
sub remove_include ($$$)
{
  my $srcfile = shift @_;
  my $include = quotemeta(shift @_);
  my $level = shift @_;

  die "$srcfile is not read/writeable!\n" if( ! -r $srcfile || ! -w $srcfile);
  open(I, "< $srcfile") or die "remove_include: couldn't open '$srcfile'\n";
  my @contents = <I>;
  close(I);

  # ok, CPU time doesn't count so we do it the lazy way
  # we should remove the last occurence of the include in the
  # file because in case its a duplicate removing the first
  # one could make a difference. 
  my @revcontents = reverse @contents;
  @contents = ();

  # we skip all inludes that are somehow ifdefed
  # note the logic is reversed because it operates
  # on reversed lines :)
  my $cpplevel = 0;
  $cpplevel = -1 if ($srcfile=~m/^.*\.$hExt$/); # plan for header-protection #ifndef/#define/#endif
  foreach $line (@revcontents) {
    if ($line =~ m/^\#if/) {
      $cpplevel = $cpplevel - 1;
      push @contents, $line;
      next;
    }

    if ($line =~ m/^\#endif/) {
      $cpplevel = $cpplevel + 1;
      push @contents, $line;
      next;
    }

    if ($level && $cpplevel == 0 && 
       (($line =~ m/^\#include\s*\"$include\"\S*/) || ($line =~ m/^\#include\s*\<$include\>\S*/)))  {
      $level = $level - 1;
      # skipping the line..
      next;
    }

    push @contents, $line;
  }

  # now we have the fixed contents in @contents, although in wrong order
  open(O, "> $srcfile") || die "remove_include: couldn't open '$srcfile' for writing\n";
  print O reverse @contents;
  close (O);
}

# first parameter: srcfile
# second parameter: include to replace
# third parameter the include file to replace it with
sub replace_include ($$$)
{
  my $srcfile = shift @_;
  my $include = quotemeta(shift @_);
  my $destinclude = shift @_;

  die "$srcfile is not read/writeable!\n" if( ! -r $srcfile || ! -w $srcfile);
  open(I, "< $srcfile") or die "replace_include: couldn't open '$srcfile'\n";
  my @contents = <I>;
  close(I);

  # ok, CPU time doesn't count so we do it the lazy way
  my @revcontents = reverse @contents;
  @contents = ();

  # we skip all inludes that are somehow ifdefed
  # note the logic is reversed because it operates
  # on reversed lines :)
  my $cpplevel = 0;
  $cpplevel = -1 if ($srcfile=~m/^.*\.$hExt$/); # plan for header-protection #ifndef/#define/#endif
  foreach $line (@revcontents) {
    if ($line =~ m/^\#if/) {
      $cpplevel = $cpplevel - 1;
      push @contents, $line;
      next;
    }

    if ($line =~ m/^\#endif/) {
      $cpplevel = $cpplevel + 1;
      push @contents, $line;
      next;
    }

    if ($cpplevel == 0 &&
      (($line =~ m/^\#include\s*\"$include\"\S*/) || ($line =~ m/^\#include\s*\<$include\>\S*/)))
    {
      print "HAH! found $include to replace in $srcfile!\n" if($verbose);
      $line =~ s/(\#include\s*[\"<])$include([\">]\S*)/$1$destinclude$2/;
    }

    push @contents, $line;
  }

  # now we have the fixed contents in @contents
  open(O, "> $srcfile") || die "replace_include: couldn't open '$srcfile' for writing\n";
  print O reverse @contents;
  close (O);
}

# fixes #include <foo.h> -> #include "foo.h"
sub replace_include_type ($$)
{
  my ($srcfile, $include) = @_;

  die "$srcfile is not read/writeable!\n" if( ! -r $srcfile || ! -w $srcfile);
  open(I, "< $srcfile") or die "replace_include: couldn't open '$srcfile'\n";
  my @contents = <I>;
  close(I);

  grep(s/^(\#include)\s*<$include>(.*)$/$1 \"$include\"$2/, @contents);

  # now we have the fixed contents in @contents
  open(O, "> $srcfile") || die "replace_include: couldn't open '$srcfile' for writing\n";
  print O @contents;
  close (O);
}

sub fix_duplicates($)
{
  my $srcfile = shift @_;

  my @includes = &find_removable_includes($srcfile);

  my %inclMap = ();

  # initialize
  foreach $include (@includes) {
    $inclMap{$include} = 0;
  }
  
  # count number of occurences
  foreach $include (@includes) {
    $inclMap{$include} = $inclMap{$include} + 1;
  }
  
  # check for duplicates
  foreach $include (keys %inclMap) {
    next if $inclMap{$include} <= 1;
    
    print "$srcfile: duplicate level ". $inclMap{$include} .": ". $include ."\n";

    &remove_include($srcfile, $include, $inclMap{$include} - 1) if($modify);
  }
}

sub extract_gcc_error($)
{
  my $out = shift;
 
 # print "out: $out\n";

  while ($out =~ m/^(.*?):([0-9]+):(.*)$/mg) # filename:lineno:message
  {
      my $field1 = $1 || "";
      my $field2 = $2 || "";
      my $field3 = $3 || "";

 #     print "f1: $field1, f2: $field2, f3: $field3\n";

      next if ($field3 =~ m/\s+warning:.*/);
      next if ($field3 =~ m/^\s*$/);
      return basename($field1);
  }
  return "BUG!";
}

sub fix_compat_includes($)
{
  my $srcfile = shift @_;

  my @includes = &find_removable_includes($srcfile);

  my %inclMap = ();

  # initialize
  foreach $include (@includes) {
    $inclMap{$include} = 0;
  }
  
  # count number of occurences
  foreach $include (@includes) {
    $inclMap{$include} = $inclMap{$include} + 1;
  }
  
  # check for compat headers
  foreach $include (keys %inclMap) {
    if( defined $compatmap{$include}) {
      print "$srcfile: compat header: $include, to be replaced by ". $compatmap{$include} ."\n";
      &replace_include($srcfile, $include, $compatmap{$include}) if($modify);
    }
  }
}

sub fix_include_type($)
{
  my $srcfile = shift @_;
  my $srcdir = dirname($srcfile);

  open(I, "<$srcfile") || die "couldn't open $srcfile in _fix_include_type";
  my @bracketincs = grep s/^\s*\#include\s*<([^>]+)>\s*$/$1/, <I>;
  close(I);

  foreach my $include (@bracketincs) {
    next if (!(-r "$srcdir/$include"));
    next if (grep (/^$include$/, @instheaders));
    next if ($include eq "config.h"); # oh don't get me started on that

    print "$srcfile: #include <$include> should use #include \"...\"\n";
    &replace_include_type($srcfile, $include) if($modify);
  }
}

# copies a file from src to dest, overwrites destination if exists
sub copy_file($$)
{
  my $src = shift(@_);
  my $dst = shift(@_);

  open(I, "< $src") or die "copy_file: can't open $src for input\n";
  my @fcontents = <I>;
  close(I);
  open(O, "> $dst") or die "copy_file: can't open $dst for output\n";
  print O @fcontents;
  close(O);
}

# interrupt handler for fix_unnecessary
sub sighandler_fix_unnecessary()
{
  my($sig) = @_;
  print "Caught a SIG$sig--shutting down after restoring $srcfile\n";
  chdir($srcdir);
  unlink $srcfile || warn "couldn't unlink $srcfile";
  rename $localbackup, $srcfile || warn "couldn't rename $localbackup to $srcfile";
  exit(1);
}

sub fix_unnecessary($)
{
  local $srcfile = shift @_;
  local $srcdir = dirname($srcfile);

  # find canonical path for srcdir
  my $origdir = cwd;
  chdir($srcdir);
  $srcdir = cwd;
  print "srcdir=$srcdir\n" if($verbose);

  my $builddir = $srcdir;
  my $makecmd = "make";
  if (defined $ENV{"OBJ_REPLACEMENT"})
  {
    # we have to use sed here, because perl can't do s#a#b#
    $builddir = `echo $srcdir | sed -e "\$OBJ_REPLACEMENT"`;
    chomp $builddir;
    $makecmd = "makeobj";
  }
  print "builddir=$builddir\n" if($verbose);

  my $tot = $exp_success + $exp_failure;
  print "=============== $srcfile (successes: $exp_success; total: $tot)\n";

  $srcfile = basename($srcfile);

  # first figure out some details
  my @includes = &find_removable_includes($srcfile);

  my $blanksrc = $srcfile;
  $blanksrc =~ s/(.*)\.[^\.]+/$1/;

  print "Checking for initial compilation: ";
  chdir($builddir);
  my $objextension = "BUG";
  if($srcfile =~ /\.$hExt$/o) {
    $output = `$makecmd all 2>&1`;
    $objextension = "all" if ( 0 == ($? >> 8));
  }
  else {
    unlink "$blanksrc.lo";
    my $output = `$makecmd $blanksrc.lo 2>&1`;
    $objextension = ".lo" if ( 0 == ($? >> 8));
    if($objextension eq "BUG") {
      print "failed with .lo... ";
      unlink "$blanksrc.o";
      $output = `$makecmd $blanksrc.o 2>&1`;
      $objextension = ".o" if ( 0 == ($? >> 8));
    }
    if($srcfile =~ /$hExt/) {
      $output = `$makecmd $blanksrc.o 2>&1`;
      $objextension = ".o" if ( 0 == ($? >> 8));
    }
  }
  if($objextension eq "BUG") {
    warn "can't figure out right compile command for $srcfile :-(\n" .
         "??? unused, or didn't compile in the first place?\n" .
         "$output";
    chdir($origdir);
    exit 1;
  }

  print "worked with $objextension\n";

  # now try to drop some includes 
  foreach $include (@includes) {
    # kdatastream is special because
    # it will break the application if removed even
    # if it continues to compile
    next if( $include eq "kdatastream.h");
    # I also like to have kdebug.h still in
    # so that it's easy to add kdDebug calls
    next if( $include eq "kdebug.h");
    # avoid this one as it might cause
    # certain code parts to be disabled from compilation
    next if( $include eq "qmodules.h");
    # don't remove this one either. causes conditional
    # code to be compiled incorrectly
    next if( $include eq "tdeversion.h");
    # don't remove the config.h include
    # conditional code may depend on this file
    next if( $include eq "config.h");
    # check if it is its own header file
    my $blankhdr = $include;
    $blankhdr =~ s/(.*)\.[^\.]+/$1/;
    next if ($blankhdr eq $blanksrc);	

    chdir($srcdir);

    local $localbackup = $srcfile . "#fixkdeincludes";

    # preserve timestamp if possible for CVS
    unlink $localbackup;
    rename $srcfile, $localbackup;
    copy_file($localbackup, $srcfile);

    # revert to backup in case of interrupt (Ctrl+C)
    $SIG{'INT'} = \&sighandler_fix_unnecessary;

    # check if it still compiles
    if($verbose) {
      chdir($builddir);
      # testing headers? need to compile everything
      if($objextension eq "all") {
        # wait a second for makefile timestamp comparisons
        sleep 1;
        `$makecmd all 2>&1`;
      }
      else {
        unlink "$blanksrc$objextension";
        `$makecmd $blanksrc$objextension 2>&1`;
      }
      die "unexpected error $output\nexitcode=" . ($? >> 8) if($? >> 8);
      chdir($srcdir);
    }

    # duplicates have to be nuked here , so it will be dropped maximum once
    print "trying without $include: ";
    &remove_include($srcfile, $include, 1);

    chdir($builddir);

    # try if it compiles
    if($objextension eq "all") {
      sleep 1;
      $output=`$makecmd $objextension 2>&1`;
    }
    else {
      unlink "$builddir/$blanksrc$objextension";
      $output=`$makecmd $blanksrc$objextension 2>&1`;
    }
    my $retcode = ($? >> 8);
    #print "retcode=$retcode\n$output" if ($verbose);

    chdir($srcdir);
    if($retcode == 0) {
      # wow, it worked, lets continue!
      print "SUCCESS!\n";
      $SIG{'INT'} = 'DEFAULT';
      unlink $localbackup;
      $exp_success = $exp_success + 1;
    }
    else {
      # is this a fixable error?
      if($objextension eq "all" and 
         &extract_gcc_error($output) ne $srcfile) {
        print "failed (error in " . &extract_gcc_error($output) . ")\n";
        # FIXME: implement fixup of the compilation error
        # so that we can be much more agressive in removing
        # unneeded includes from headers
      }
      else
      {
        # better luck next time
        print "FATALLY failed\n";
      }
      unlink $srcfile;
      rename $localbackup, $srcfile;
      $SIG{'INT'} = 'DEFAULT';

      $exp_failure = $exp_failure + 1;
    }
  }

  print "\n";

  chdir($origdir);
}

sub process_source_file($)
{
  local $file = shift @_;
  my $pure = basename($file);
  print "Checking: $file\n" if($verbose);
  &fix_include_type($file);
  &fix_compat_includes($file);
  &fix_duplicates($file);
  &fix_unnecessary($file) if ($experimental && !grep (/^$pure$/, @instheaders));
  print "\n" if ($verbose);
}

sub check_for_automake_srcdir($)
{
  my $dir = shift;

  return 0 if !( -r "$dir/Makefile.am");
  return 1 if !( -r "$dir/Makefile");

  # ok, now its either srcdir with srcdir==builddir, or its builddir,
  # which we don't want. 

  open(I, "<$dir/Makefile") || die "couldn't read $dir/Makefile";
  while(<I>) {
    if(/^srcdir\s*=\s*(\S+)/) {
      close(I);

      if($1 ne ".") {
        print "Skipping build dir: $dir\n" if($verbose);
        return 0;
      }
      return 1;
    }
  }
  
  close(I);
  # ok, this makefile isn't generated by automake, we don't want that
  return 0;
}

#############################################################################
# here is the main logic
#

# warn about modified files
if($modify) {
  `cvscheck | grep '^[MmC]'`;
  print "WARNING: you have pending local changes. You might commit them by accident!\n\n" if($? >> 8 == 0);
}

if($experimental) {
  print "WARNING: The experimental mode is indeed experimental.\n";
  print "It tries to reduce includes by testing if it would compile\n";
  print "without a particular include. It might introduce subtle bugs\n";
  print "or break compilation for make check or make final.\n\n";
  print "This operation mode is known to be unsafe. You've been warned.\n";
}

# process files from the command line, if any
if ( $#explicitfiles >= 0 ) {
  foreach $file( @explicitfiles ) {
    &process_source_file( $file );
  }
  exit 0;
}

# first generate a list of subdirectories
@dirlist = ();
push @dirlist, "." if (&check_for_automake_srcdir("."));
die "current directory isn't srcdir!" if (!scalar @dirlist);
foreach $dir ( @dirlist ) {
 opendir (DIR, "$dir") || warn "Couldn't read '$dir'";
 my $subdir = "";
 while( $subdir = readdir(DIR)) {
   next if ($subdir =~ /^\./);
   next if !( -d "$dir/$subdir");
   next if (! &check_for_automake_srcdir("$dir/$subdir"));

   push @dirlist, "$dir/$subdir";
 }
 closedir(DIR);
}

# now iterate over all subdirs
foreach $dir(@dirlist) {

  # check if this directory wants not to be fixed
  if(open(M, "$dir/Makefile.am")) {
    my @mcontents = grep /(\-UQT_NO_COMPAT|\-UKDE_NO_COMPAT)/, <M>;
    close(M);
    if ( @mcontents ) {
      print "Skipping directory: $dir\n";
      next;
    }
  }

  @headers = &find_fixable_headers($dir);
  @instheaders = &find_installed_headers($dir);
  foreach $file(@headers) {
    &process_source_file($file);
  }
  @sources = &find_fixable_sources($dir);
  foreach $file(@sources) {
    &process_source_file($file);
  }
}