/***************************************************************************
                          ksaneocr.cpp  -  generic ocr
                             -------------------
    begin                : Fri Jun 30 2000
    copyright            : (C) 2000 by Klaas Freitag
    email                : freitag@suse.de
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *  This file may be distributed and/or modified under the terms of the    *
 *  GNU General Public License version 2 as published by the Free Software *
 *  Foundation and appearing in the file COPYING included in the           *
 *  packaging of this file.                                                *
 *
 *  As a special exception, permission is given to link this program       *
 *  with any version of the KADMOS ocr/icr engine of reRecognition GmbH,   *
 *  Kreuzlingen and distribute the resulting executable without            *
 *  including the source code for KADMOS in the source distribution.       *
 *
 *  As a special exception, permission is given to link this program       *
 *  with any edition of TQt, and distribute the resulting executable,       *
 *  without including the source code for TQt in the source distribution.   *
 *                                                                         *
 ***************************************************************************/

/* $Id$ */

#include <kdebug.h>
#include <kmessagebox.h>
#include <kconfig.h>
#include <kapplication.h>
#include <ktempfile.h>
#include <kprocess.h>
#include <stdlib.h>
#include <kspell.h>
#include <kspelldlg.h>
#include <tqfile.h>
#include <tqcolor.h>
#include <stdio.h>
#include <unistd.h>

#include <img_canvas.h>

#include "img_saver.h"
#include "kadmosocr.h"
#include "kocrbase.h"
#include "kocrkadmos.h"
#include "kocrocrad.h"
#include "config.h"
#include "ksaneocr.h"
#include "kocrgocr.h"
#include "kookaimage.h"
#include "kookapref.h"
#include "ocrword.h"

#include <tqtimer.h>
#include <tqregexp.h>
#include <klocale.h>
#include <tqpaintdevice.h>
#include <tqpainter.h>
#include <tqpen.h>
#include <tqbrush.h>
#include <tqfileinfo.h>

/*
 * Thread support is disabled here because the kadmos lib seems not to be
 * thread save unfortunately. See slotKadmosResult-comments for more information
 */

KSANEOCR::KSANEOCR( TQWidget*, KConfig *cfg ):
    m_ocrProcessDia(0L),
    daemon(0L),
    visibleOCRRunning(false),
    m_resultImage(0),
    m_imgCanvas(0L),
    m_spell(0L),
    m_wantKSpell(true),
    m_kspellVisible(true),
    m_hideDiaWhileSpellcheck(true),
    m_spellInitialConfig(0L),
    m_parent(0L),
    m_ocrCurrLine(0),
    m_currHighlight(-1),
    m_applyFilter(false),
    m_unlinkORF(true)
{
    KConfig *konf = KGlobal::config ();
    m_ocrEngine = OCRAD;
    m_img = 0L;
    m_tmpFile = 0L;

    if( cfg )
        m_hideDiaWhileSpellcheck = cfg->readBoolEntry( HIDE_BASE_DIALOG, true );
    /*
     * a initial config is needed as a starting point for the config dialog
     * but also for ocr without visible dialog.
     */
    m_spellInitialConfig = new KSpellConfig( 0L, 0L ,0L, false );

    if( konf )
    {
        /* -- ocr dialog information -- */
        konf->setGroup( CFG_GROUP_OCR_DIA );
        TQString eng = konf->readEntry(CFG_OCR_ENGINE, "ocrad");

        if( eng == "ocrad" )
        {
            m_ocrEngine = OCRAD;
        }
        else if( eng == "gocr" )
        {
            m_ocrEngine = GOCR;
        }
#ifdef HAVE_KADMOS
        else if( eng == TQString("kadmos") ) m_ocrEngine = KADMOS;
#endif
        kdDebug(28000) << "OCR engine is " << eng << endl;

        m_unlinkORF = konf->readBoolEntry( CFG_OCR_CLEANUP, true );
    }

    /* resize m_blocks to size 1 since there is at least one block */
    m_blocks.resize(1);

}


KSANEOCR::~KSANEOCR()
{
   if( daemon  ) {
      delete( daemon );
      daemon = 0;
   }
   if ( m_tmpFile )
   {
       m_tmpFile->setAutoDelete( true );
       delete m_tmpFile;
   }

   if( m_resultImage )
   {
       delete m_resultImage;
       m_resultImage = 0;
   }

   if( m_img ) delete m_img;
   if( m_spellInitialConfig ) delete m_spellInitialConfig;
}

/*
 * This slot is called to introduce a new image, usually if the user clicks on a
 * new image either in the gallery or on the thumbnailview.
 */
void KSANEOCR::slSetImage(KookaImage *img )
{
   if( ! img ) return           ;

   if( m_img )
       delete m_img;

   // FIXME: copy all the image is bad.
   m_img = new KookaImage(*img);

   if( m_ocrProcessDia )
   {
       m_ocrProcessDia->introduceImage( m_img );
   }

   m_applyFilter = false;
}

/*
 * Request to visualise a line-box in the source image, KADMOS Engine
 */
void KSANEOCR::slLineBox( const TQRect& )
{
    if( ! m_img ) return;
}


/*
 * starts visual ocr process. Depending on the ocr engine, this function creates
 * a new dialog, and shows it.
 */
bool KSANEOCR::startOCRVisible( TQWidget *parent )
{
   if( visibleOCRRunning ) return( false );
   bool res = true;

   m_parent = parent;

   if( m_ocrEngine == GOCR )
   {
       m_ocrProcessDia = new KGOCRDialog ( parent, m_spellInitialConfig );
   }
   else if( m_ocrEngine == OCRAD )
   {
       m_ocrProcessDia = new ocradDialog( parent, m_spellInitialConfig );
   }
   else if( m_ocrEngine == KADMOS )
   {
#ifdef HAVE_KADMOS
/*** Kadmos Engine OCR ***/
       m_ocrProcessDia = new KadmosDialog( parent, m_spellInitialConfig );
#else
       KMessageBox::sorry(0, i18n("This version of Kooka was not compiled with KADMOS support.\n"
           "Please select another OCR engine in Kooka's options dialog."));
       kdDebug(28000) << "Sorry, this version of Kooka has no KADMOS support" << endl;
#endif /* HAVE_KADMOS */
   }
   else
   {
      kdDebug(28000) << "ERR Unknown OCR engine requested!" << endl;
   }

   /*
    * this part is independant from the engine again
    */
   if( m_ocrProcessDia )
   {
       m_ocrProcessDia->setupGui();

       m_ocrProcessDia->introduceImage( m_img );
       visibleOCRRunning = true;

      connect( m_ocrProcessDia, TQT_SIGNAL( user1Clicked()), this, TQT_SLOT( startOCRProcess() ));
      connect( m_ocrProcessDia, TQT_SIGNAL( closeClicked()), this, TQT_SLOT( slotClose() ));
      connect( m_ocrProcessDia, TQT_SIGNAL( user2Clicked()), this, TQT_SLOT( slotStopOCR() ));
      m_ocrProcessDia->show();

   }
   return( res );
}

/**
 * This method should be called by the engine specific finish slots.
 * It does the not engine dependant cleanups like re-enabling buttons etc.
 */

void KSANEOCR::finishedOCRVisible( bool success )
{
   bool doSpellcheck = m_wantKSpell;

   if( m_ocrProcessDia )
   {
       m_ocrProcessDia->stopOCR();
       doSpellcheck = m_ocrProcessDia->wantSpellCheck();
   }

   if( success )
   {
       TQString goof = ocrResultText();

       emit newOCRResultText(goof);

       if( m_imgCanvas )
       {
	   if( m_resultImage != 0 ) delete m_resultImage;
	   kdDebug(28000) << "Result image name: " << m_ocrResultImage << endl;
	   m_resultImage = new TQImage( m_ocrResultImage, "BMP" );
	   kdDebug(28000) << "New result image has dimensions: " << m_resultImage->width() << "x" << m_resultImage->height()<< endl;
           /* The image canvas is non-zero. Set it to our image */
           m_imgCanvas->newImageHoldZoom( m_resultImage );
	   m_imgCanvas->setReadOnly(true);

           /* now handle double clicks to jump to the word */
           m_applyFilter=true;
       }

       /** now it is time to invoke the dictionary if required **/
       emit readOnlyEditor( false );

       if( doSpellcheck )
       {
           m_ocrCurrLine = 0;
           /*
            * create a new kspell object, based on the config of the base dialog
            */

           connect( new KSpell( m_parent, i18n("Kooka OCR Dictionary Check"),
                                this, TQT_SLOT( slSpellReady(KSpell*)),
                                m_ocrProcessDia->spellConfig() ),
                    TQT_SIGNAL( death()), this, TQT_SLOT(slSpellDead()));
       }

       delete m_ocrProcessDia;
       m_ocrProcessDia = 0L;

   }

   visibleOCRRunning =  false;
   cleanUpFiles();


   kdDebug(28000) << "# ocr finished #" << endl;
}

/*
 * starting the spell check on line m_ocrCurrLine if the line exists.
 * If not, the function returns.
 */
void KSANEOCR::startLineSpellCheck()
{
    if( m_ocrCurrLine < m_ocrPage.size() )
    {
        m_checkStrings = (m_ocrPage[m_ocrCurrLine]).stringList();

        /* In case the checklist is empty, call the result slot immediately */
        if( m_checkStrings.count() == 0 )
        {
            slCheckListDone(false);
	    return;
        }

        kdDebug(28000)<< "Wordlist (size " << m_ocrPage[m_ocrCurrLine].count() << ", line " << m_ocrCurrLine << "):" << m_checkStrings.join(", ") << endl;

        // if( list.count() > 0 )

        m_spell->checkList( &m_checkStrings, m_kspellVisible );
	kdDebug(28000)<< "Started!" << endl;
        /**
         * This call ends in three slots:
         * 1. slMisspelling:    Hit _before_ the dialog (if any) appears. Time to
         *    mark the wrong word.
         * 2. slSpellCorrected: Hit if the user decided which word to use.
         * 3. slCheckListDone:  The line is finished. The global counter needs to be
         *    increased and this function needs to be called again.
         **/

    }
    else
    {
        kdDebug(28000) << k_funcinfo <<" -- no more lines !" << endl;
        m_spell->cleanUp();
    }
}



/* User Cancel is called when the user does not really start the
 * ocr but uses the cancel-Button to come out of the Dialog */
void KSANEOCR::slotClose()
{
    kdDebug(28000) << "closing ocr Dialog" << endl;
   if( daemon && daemon->isRunning() )
   {
      kdDebug(28000) << "Still running - Killing daemon with Sig. 9" << endl;
      daemon->kill(9);
   }
   finishedOCRVisible(false);
}

void KSANEOCR::slotStopOCR()
{
    kdDebug(28000) << "closing ocr Dialog" << endl;
    if( daemon && daemon->isRunning() )
    {
        kdDebug(28000) << "Killing daemon with Sig. 9" << endl;
        daemon->kill(9);
        // that leads to the process being destroyed.
        KMessageBox::error(0, i18n("The OCR-process was stopped.") );
    }

}

void KSANEOCR::startOCRAD( )
{
    ocradDialog *ocrDia = static_cast<ocradDialog*>(m_ocrProcessDia);

    m_ocrResultImage = ocrDia->orfUrl();
    const TQString cmd = ocrDia->getOCRCmd();

    // if( m_ocrResultImage.isEmpty() )
    {
	/* The url is empty. Start the program to fill up a temp file */
	m_ocrResultImage = ImgSaver::tempSaveImage( m_img, "BMP", 8 ); // m_tmpFile->name();
	kdDebug(28000) << "The new image name is <" << m_ocrResultImage << ">" << endl;
    }

    m_ocrImagePBM = ImgSaver::tempSaveImage( m_img, "PBM", 1 );

    /* temporar file for orf result */
    KTempFile *tmpOrf = new KTempFile( TQString(), ".orf" );
    tmpOrf->setAutoDelete( false );
    tmpOrf->close();
    m_tmpOrfName = TQFile::encodeName(tmpOrf->name());


    if( daemon )
    {
	delete( daemon );
	daemon = 0;
    }

    daemon = new KProcess;
    TQ_CHECK_PTR(daemon);

    *daemon << cmd;
    *daemon << TQString("-x");
    *daemon <<  m_tmpOrfName;                   // the orf result file
    *daemon << TQFile::encodeName( m_ocrImagePBM ).data();      // The name of the image
    *daemon << TQString("-l");
    *daemon << TQString::number( ocrDia->layoutDetectionMode());

    KConfig *konf = KGlobal::config ();
    KConfigGroupSaver( konf, CFG_GROUP_OCRAD );

    TQString format = konf->readEntry( CFG_OCRAD_FORMAT, "utf8");
    *daemon << TQString("-F");
    *daemon << format;

    TQString charset = konf->readEntry( CFG_OCRAD_CHARSET, "iso-8859-15");
    *daemon << TQString("-c");
    *daemon << charset;


    TQString addArgs = konf->readEntry( CFG_OCRAD_EXTRA_ARGUMENTS, TQString() );

    if( !addArgs.isEmpty() )
    {
	kdDebug(28000) << "Setting additional args from config for ocrad: " << addArgs << endl;
	*daemon << addArgs;
    }

    m_ocrResultText = "";

    connect(daemon, TQT_SIGNAL(processExited(KProcess *)),
	    this,   TQT_SLOT(  ocradExited(KProcess*)));
    connect(daemon, TQT_SIGNAL(receivedStdout(KProcess *, char*, int)),
	    this,   TQT_SLOT(  ocradStdIn(KProcess*, char*, int)));
    connect(daemon, TQT_SIGNAL(receivedStderr(KProcess *, char*, int)),
	    this,   TQT_SLOT(  ocradStdErr(KProcess*, char*, int)));

    if (!daemon->start(KProcess::NotifyOnExit, KProcess::All))
    {
	kdDebug(28000) <<  "Error starting ocrad-daemon!" << endl;
    }
    else
    {
	kdDebug(28000) << "Start OK" << endl;

    }
    delete tmpOrf;
}


void KSANEOCR::ocradExited(KProcess* )
{
    kdDebug(28000) << "ocrad exit " << endl;
    TQString err;
    bool parseRes = true;

    if( ! readORF(m_tmpOrfName, err) )
    {
        KMessageBox::error( m_parent,
                            i18n("Parsing of the OCR Result File failed:") + err,
                            i18n("Parse Problem"));
        parseRes = false;
    }
    finishedOCRVisible( parseRes );

}

void KSANEOCR::ocradStdErr(KProcess*, char* buffer, int buflen)
{
   TQString errorBuffer = TQString::fromLocal8Bit(buffer, buflen);
   kdDebug(28000) << "ocrad says on stderr: " << errorBuffer << endl;

}

void KSANEOCR::ocradStdIn(KProcess*, char* buffer, int buflen)
{
   TQString errorBuffer = TQString::fromLocal8Bit(buffer, buflen);
   kdDebug(28000) << "ocrad says on stdin: " << errorBuffer << endl;
}




/*
 * This slot is fired if the user clicks on the 'Start' button of the GUI, no
 * difference which engine is active.
 */
void KSANEOCR::startOCRProcess( void )
{
   if( ! m_ocrProcessDia ) return;

   /* starting the animation, setting fields disabled */
   m_ocrProcessDia->startOCR();

   kapp->processEvents();
   if( m_ocrEngine == OCRAD )
   {
       startOCRAD();
   }

   if( m_ocrEngine == GOCR )
   {
       /*
        * Starting a gocr process
        */

       KGOCRDialog *gocrDia = static_cast<KGOCRDialog*>(m_ocrProcessDia);

       const TQString cmd = gocrDia->getOCRCmd();

       /* Save the image to a temp file */

       /**
        * Save images formats:
        * Black&White: PBM
        * Gray:        PGM
        * Bunt:        PPM
        */
       TQString format;
       if( m_img->depth() == 1 )
           format = "PBM";
       else if( m_img->isGrayscale() )
           format = "PGM";
       else
           format = "PPM";

       TQString tmpFile = ImgSaver::tempSaveImage( m_img, format ); // m_tmpFile->name();

       kdDebug(28000) <<  "Starting GOCR-Command: " << cmd << " on file " << tmpFile
		      << ", format " << format << endl;

       if( daemon ) {
           delete( daemon );
           daemon = 0;
       }

       daemon = new KProcess;
       TQ_CHECK_PTR(daemon);
       m_ocrResultText = "";

       connect(daemon, TQT_SIGNAL(processExited(KProcess *)),
               this,   TQT_SLOT(  gocrExited(KProcess*)));
       connect(daemon, TQT_SIGNAL(receivedStdout(KProcess *, char*, int)),
               this,   TQT_SLOT(  gocrStdIn(KProcess*, char*, int)));
       connect(daemon, TQT_SIGNAL(receivedStderr(KProcess *, char*, int)),
               this,   TQT_SLOT(  gocrStdErr(KProcess*, char*, int)));

       TQString opt;
       *daemon << TQFile::encodeName(cmd).data();
       *daemon << "-x";
       *daemon << "-";
       if( !( m_img->numColors() > 0 && m_img->numColors() <3 ))   /* not a bw-image */
       {
           *daemon << "-l";
           opt.setNum(gocrDia->getGraylevel());
           *daemon << opt;
       }
       *daemon << "-s";
       opt.setNum(gocrDia->getSpaceWidth());
       *daemon << opt;
       *daemon << "-d";
       opt.setNum(gocrDia->getDustsize());
       *daemon << opt;

       // Write an result image
       *daemon << "-v";
       *daemon << "32";

       // Unfortunately this is fixed by gocr.
       m_ocrResultImage = "out30.bmp";

       *daemon << TQFile::encodeName(tmpFile).data();

       m_ocrCurrLine = 0;  // Important in gocrStdIn to store the results

       if (!daemon->start(KProcess::NotifyOnExit, KProcess::All))
       {
           kdDebug(28000) <<  "Error starting daemon!" << endl;
       }
       else
       {
           kdDebug(28000) << "Start OK" << endl;

       }
   }
#ifdef HAVE_KADMOS
   if( m_ocrEngine == KADMOS )
   {
       KadmosDialog *kadDia = static_cast<KadmosDialog*>(m_ocrProcessDia);

       kdDebug(28000)  << "Starting Kadmos OCR Engine" << endl;

       TQString clasPath;  /* target where the clasPath is written in */
       if( ! kadDia->getSelClassifier( clasPath ) )
       {
           KMessageBox::error( m_parent,
                               i18n("The classifier file necessary for OCR cannot be loaded: %1;\n"
                                   "OCR with the KADMOS engine is not possible." ).
                               arg(clasPath), i18n("KADMOS Installation Problem"));
           finishedOCRVisible(false);
           return;
       }
       TQCString c = clasPath.latin1();

       kdDebug(28000) << "Using classifier " << c << endl;
       m_rep.Init( c );
       if( m_rep.kadmosError() ) /* check if kadmos initialised OK */
       {
	   KMessageBox::error( m_parent,
			       i18n("The KADMOS OCR system could not be started:\n") +
			       m_rep.getErrorText()+
			       i18n("\nPlease check the configuration." ),
			       i18n("KADMOS Failure") );
       }
       else
       {
	   /** Since initialising succeeded, we start the ocr here **/
	   m_rep.SetNoiseReduction( kadDia->getNoiseReduction() );
	   m_rep.SetScaling( kadDia->getAutoScale() );
	   kdDebug(28000) << "Image size " << m_img->width() << " x " << m_img->height() << endl;
	   kdDebug(28000) << "Image depth " << m_img->depth() << ", colors: " << m_img->numColors() << endl;
#define USE_KADMOS_FILEOP /* use a save-file for OCR instead of filling the reImage struct manually */
#ifdef USE_KADMOS_FILEOP
	   m_tmpFile = new KTempFile( TQString(), TQString("bmp"));
	   m_tmpFile->setAutoDelete( false );
	   m_tmpFile->close();
	   TQString tmpFile = m_tmpFile->name();
	   kdDebug() << "Saving to file " << tmpFile << endl;
	   m_img->save( tmpFile, "BMP" );
	   m_rep.SetImage(tmpFile);
#else
	   m_rep.SetImage(m_img);
#endif
	   // rep.Recognize();
	   m_rep.run();

	   /* Dealing with threads or no threads (using TQT_THREAD_SUPPORT to distinguish)
	    * If threads are here, the recognition task is started in its own thread. The gui thread
	    * needs to wait until the recognition thread is finished. Therefore, a timer is fired once
	    * that calls slotKadmosResult and checks if the recognition task is finished. If it is not,
	    * a new one-shot-timer is fired in slotKadmosResult. If it is, the OCR result can be
	    * processed.
	    * In case the system has no threads, the method start of the recognition engine does not
	    * return until it is ready, the user has to live with a non responsive gui while
	    * recognition is performed. The start()-method is implemented as a wrapper to the run()
	    * method of CRep, which does the recognition job. Instead of pulling up a timer, simply
	    * the result slot is called if start()=run() has finished. In the result slot, finished()
	    * is only a dummy always returning true to avoid more preprocessor tags here.
	    * Hope that works ...
	    * It does not :( That is why it is not used here. Maybe some day...
	    */
       }
#ifdef TQT_THREAD_SUPPORT
       /* start a timer and wait until it fires. */
       TQTimer::singleShot( 500, this, TQT_SLOT( slotKadmosResult() ));
#else
       slotKadmosResult();
#endif

   }
#endif /* HAVE_KADMOS */
}

/*
 * This method is called to check if the kadmos process was already finished, if
 * thread support is enabled (check for preprocessor variable TQT_THREAD_SUPPORT)
 * The problem is that the kadmos library seems not to be thread stable so thread
 * support should not be enabled by default. In case threads are enabled, this slot
 * checks if the KADMOS engine is finished already and if not it fires a timer.
 */

void KSANEOCR::slotKadmosResult()
{
#ifdef HAVE_KADMOS
    kdDebug(28000) << "check for Recognition finished" << endl;


    if( m_rep.finished() )
    {
        /* The recognition thread is finished. */
        kdDebug(28000) << "kadmos is finished." << endl;

        m_ocrResultText = "";
	if( ! m_rep.kadmosError() )
	{
	    int lines = m_rep.GetMaxLine();
	    kdDebug(28000) << "Count lines: " << lines << endl;
	    m_ocrPage.clear();
	    m_ocrPage.resize( lines );

	    for( int line = 0; line < m_rep.GetMaxLine(); line++ )
	    {
		// ocrWordList wordList = m_rep.getLineWords(line);
		/* call an ocr engine independent method to use the spellbook */
		ocrWordList words = m_rep.getLineWords(line);
		kdDebug(28000) << "Have " << words.count() << " entries in list" << endl;
		m_ocrPage[line]=words;
	    }

	    /* show results of ocr */
	    m_rep.End();
	}
	finishedOCRVisible( !m_rep.kadmosError() );

    }
    else
    {
        /* recognition thread is not yet finished. Wait another half a second. */
        TQTimer::singleShot( 500, this, TQT_SLOT( slotKadmosResult() ));
        /* Never comes here if no threads exist on the system */
    }
#endif /* HAVE_KADMOS */
}




/*
 *
 */
void KSANEOCR::gocrExited(KProcess* d)
{
   kdDebug(28000) << "daemonExited start !" << endl;

   /* Now all the text of gocr is in the member m_ocrResultText. This one must
    * be split up now to m_ocrPage. First break up the lines, resize m_ocrPage
    * accordingly and than go through every line and create ocrwords for every
    * word.
    */
   TQStringList lines = TQStringList::split( '\n', m_ocrResultText, true );

   m_ocrPage.clear();
   m_ocrPage.resize( lines.count() );

   kdDebug(28000) << "RESULT " << m_ocrResultText << " was splitted to lines " << lines.count() << endl;

   unsigned int lineCnt = 0;

   for ( TQStringList::Iterator it = lines.begin(); it != lines.end(); ++it )
   {
       kdDebug(28000) << "Splitting up line " << *it << endl;
       ocrWordList ocrLine;

       TQStringList words = TQStringList::split( TQRegExp( "\\s+" ), *it, false );
       for ( TQStringList::Iterator itWord = words.begin(); itWord != words.end(); ++itWord )
       {
           kdDebug(28000) << "Appending to results: " << *itWord << endl;
           ocrLine.append( ocrWord( *itWord ));
       }
       m_ocrPage[lineCnt] = ocrLine;
       lineCnt++;
   }
   kdDebug(28000) << "Finished to split!" << endl;
   /* set the result pixmap to the result pix of gocr */
   if( ! m_resPixmap.load( m_ocrResultImage ) )
   {
       kdDebug(28000) << "Can not load result image!" << endl;
   }

   /* load the gocr result image */
   if( m_img ) delete m_img;
   m_img = new KookaImage();
   m_img->load( "out30.bmp" );

   finishedOCRVisible( d->normalExit() );
}

/*
 *  A sample orf snippet:
 *
 *  # Ocr Results File. Created by GNU ocrad version 0.3pre1
 *  total blocks 2
 *  block 1 0 0 560 344
 *  lines 5
 *  line 1 chars 10 height 26
 *  71 109 17 26;2,'0'1,'o'0
 *  93 109 15 26;2,'1'1,'l'0
 *  110 109 18 26;1,'2'0
 *  131 109 18 26;1,'3'0
 *  151 109 19 26;1,'4'0
 *  172 109 17 26;1,'5'0
 *  193 109 17 26;1,'6'0
 *  213 108 17 27;1,'7'0
 *  232 109 18 26;1,'8'0
 *  253 109 17 26;1,'9'0
 *  line 2 chars 14 height 27
 *
 */

bool KSANEOCR::readORF( const TQString& fileName, TQString& errStr )
{
    TQFile file( fileName );
    TQRegExp rx;
    bool error = false;

    /* use a global line number counter here, not the one from the orf. The orf one
     * starts at 0 for every block, but we want line-no counting page global here.
     */
    unsigned int lineNo = 0;
    int	blockCnt = 0;
    int currBlock = -1;


    /* Fetch the numeric version of ocrad */
    ocradDialog *ocrDia = static_cast<ocradDialog*>(m_ocrProcessDia);
    int ocradVersion = 0;
    if( ocrDia )
    {
        ocradVersion = ocrDia->getNumVersion();
    }

    /* clear the ocr result page */
    m_ocrPage.clear();
    kdDebug(28000) << "***** starting to analyse orf at " << fileName << " *****" << endl;

    /* some  checks on the orf */
    TQFileInfo fi( fileName );
    if( ! fi.exists() ) {
        error = true;
        errStr = i18n("The orf %1 does not exist.").arg(fileName);
    }
    if( ! error && ! fi.isReadable() ) {
        error = true;
        errStr = i18n("Permission denied on file %1.").arg(fileName);
    }


    if ( !error && file.open( IO_ReadOnly ) )
    {
        TQTextStream stream( &file );
        TQString line;
	TQString recLine; // recognised line

        while ( !stream.atEnd() )
	{
            line = stream.readLine().stripWhiteSpace(); // line of text excluding '\n'
	    int len = line.length();

	    if( ! line.startsWith( "#" ))  // Comments
	    {
		kdDebug(28000) << "# Line check |" << line << "|" << endl;
		if( line.startsWith( "total blocks " ) )  // total count fo blocks, must be first line
		{
		    blockCnt = line.right( len - 13 /* TQString("total blocks ").length() */ ).toInt();
		    kdDebug(28000) << "Amount of blocks: " << blockCnt << endl;
		    m_blocks.resize(blockCnt);
		}
                else if( line.startsWith( "total text blocks " ))
                {
		    blockCnt = line.right( len - 18 /* TQString("total text blocks ").length() */ ).toInt();
		    kdDebug(28000) << "Amount of blocks (V. 10): " << blockCnt << endl;
		    m_blocks.resize(blockCnt);
                }
		else if( line.startsWith( "block ") || line.startsWith( "text block ") )
		{
		    rx.setPattern("^.*block\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)");
		    if( rx.search( line ) > -1)
		    {
			currBlock = (rx.cap(1).toInt())-1;
			kdDebug(28000) << "Setting current block " << currBlock << endl;
			TQRect r( rx.cap(2).toInt(), rx.cap(3).toInt(), rx.cap(4).toInt(), rx.cap(5).toInt());
			m_blocks[currBlock] = r;
		    }
		    else
		    {
			kdDebug(28000) << "WRN: Unknown block line: " << line << endl;
                        // Not a killing bug
		    }
		}
		else if( line.startsWith( "lines " ))
		{
		    int lineCnt = line.right( len - 6 /* TQString("lines ").length() */).toInt();
		    m_ocrPage.resize(m_ocrPage.size()+lineCnt);
		    kdDebug(28000) << "Resized ocrPage to linecount " << lineCnt << endl;
		}
		else if( line.startsWith( "line" ))
		{
		    // line 5 chars 13 height 20
		    rx.setPattern("^line\\s+(\\d+)\\s+chars\\s+(\\d+)\\s+height\\s+\\d+" );
		    if( rx.search( line )>-1 )
		    {
			kdDebug(28000) << "RegExp-Result: " << rx.cap(1) << " : " << rx.cap(2) << endl;
			int charCount = rx.cap(2).toInt();
			ocrWord word;
			TQRect   brect;
			ocrWordList ocrLine;
                        ocrLine.setBlock(currBlock);
			/* Loop over all characters in the line. Every char has it's own line
			 * defined in the orf file */
			kdDebug(28000) << "Found " << charCount << " chars for line " << lineNo << endl;

			for( int c=0; c < charCount && !stream.atEnd(); c++ )
			{
			    /* Read one line per character */
			    TQString charLine = stream.readLine();
			    int semiPos = charLine.find(';');
			    if( semiPos == -1 )
			    {
				kdDebug(28000) << "invalid line: " << charLine << endl;
			    }
			    else
			    {
 				TQString rectStr = charLine.left( semiPos );
				TQString results = charLine.remove(0, semiPos+1 );
                                bool lineErr = false;

				// rectStr contains the rectangle info of for the character
				// results contains the real result caracter

                                // find the amount of alternatives.
                                int altCount = 0;
                                int h = results.find(',');  // search the first comma
                                if( h > -1 ) {
                                    // kdDebug(28000) << "Results of count search: " << results.left(h) << endl;
                                    altCount = results.left(h).toInt();
                                    results = results.remove( 0, h+1 ).stripWhiteSpace();
                                } else {
                                    lineErr = true;
                                }
                                // kdDebug(28000) << "Results-line after cutting the alter: " << results << endl;
                                TQChar detectedChar = UndetectedChar;
                                if( !lineErr )
                                {
                                    /* take the first alternative only FIXME */
                                    if( altCount > 0 )
                                        detectedChar = results[1];
                                    // kdDebug(28000) << "Found " << altCount << " alternatives for "
                                    //	       << TQString(detectedChar) << endl;
                                }

                                /* Analyse the rectangle */
                                if( ! lineErr && detectedChar != ' ' )
                                {
                                    // kdDebug(28000) << "STRING: " << rectStr << "<" << endl;
                                    rx.setPattern( "(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)");
                                    if( rx.search( rectStr ) != -1 )
                                    {
                                        /* unite the rectangles */
                                        TQRect privRect( rx.cap(1).toInt(), rx.cap(2).toInt(),
                                                        rx.cap(3).toInt(), rx.cap(4).toInt() );
                                        word.setRect( word.rect() | privRect );
                                    }
                                    else
                                    {
                                        kdDebug(28000) << "ERR: Unable to read rect info for char!" << endl;
                                        lineErr = true;
                                    }
                                }

                                if( ! lineErr )
                                {
                                    /* store word if finished by a space */
                                    if( detectedChar == ' ' )
                                    {
					/* add the block offset to the rect of the word */
					TQRect r = word.rect();
                                        if( ocradVersion < 10 )
                                        {
                                            TQRect blockRect = m_blocks[currBlock];
                                            r.moveBy( blockRect.x(), blockRect.y());
                                        }

					word.setRect( r );
                                        ocrLine.append( word );
                                        word = ocrWord();
                                    }
                                    else
                                    {
                                        word.append( detectedChar );
                                    }
                                }
			    }
			}
			if( !word.isEmpty() )
			{
			    /* add the block offset to the rect of the word */
			    TQRect r = word.rect();
                            if( ocradVersion < 10 )
                            {
                                TQRect blockRect = m_blocks[currBlock];
                                r.moveBy( blockRect.x(), blockRect.y());
                            }
			    word.setRect( r );

			    ocrLine.append( word );
			}
			if( lineNo < m_ocrPage.size() )
			{
			    kdDebug(29000) << "Store result line no " << lineNo << "=\"" <<
                                ocrLine.first() << "..." << endl;
			    m_ocrPage[lineNo] = ocrLine;
			    lineNo++;
			}
			else
			{
			    kdDebug(28000) << "ERR: line index overflow: " << lineNo << endl;
			}
		    }
                    else
                    {
                        kdDebug(28000) << "ERR: Unknown line found: " << line << endl;
                    }
		}
		else
		{
		    kdDebug(29000) << "Unknown line: " << line << endl;
		}
	    }  /* is a comment? */

        }
        file.close();
    }
    return !error;
}


void KSANEOCR::cleanUpFiles( void )
{
    if( m_tmpFile )
    {
        delete m_tmpFile;
        m_tmpFile = 0L;
    }

   if( ! m_ocrResultImage.isEmpty())
   {
      kdDebug(28000) << "Unlinking OCR Result image file!" << endl;
      unlink(TQFile::encodeName(m_ocrResultImage));
      m_ocrResultImage = TQString();
   }

   if( ! m_ocrImagePBM.isEmpty())
   {
       kdDebug(28000) << "Unlinking OCR PBM file!" << endl;
       unlink( TQFile::encodeName(m_ocrImagePBM));
       m_ocrImagePBM = TQString();
   }

   if( ! m_tmpOrfName.isEmpty() )
   {
       if( m_unlinkORF )
   {
      unlink(TQFile::encodeName(m_tmpOrfName));
      m_tmpOrfName = TQString();
   }
       else
       {
           kdDebug(28000)  << "Do NOT unlink temp orf file " << m_tmpOrfName << endl;
       }
   }

   /* Delete the debug images of gocr ;) */
   unlink( "out20.bmp" );
}


void KSANEOCR::gocrStdErr(KProcess*, char* buffer, int buflen)
{
   TQString errorBuffer = TQString::fromLocal8Bit(buffer, buflen);
   kdDebug(28000) << "gocr says: " << errorBuffer << endl;

}


void KSANEOCR::gocrStdIn(KProcess*, char* buffer, int buflen)
{
    TQString aux = TQString::fromLocal8Bit(buffer, buflen);

    TQRegExp rx( "^\\s*\\d+\\s+\\d+");
    if( rx.search( aux ) > -1 )
    {
        /* calculate ocr progress for gocr */
        int progress = rx.capturedTexts()[0].toInt();
        int subProgress = rx.capturedTexts()[1].toInt();
        // kdDebug(28000) << "Emitting progress: " << progress << endl;
        emit ocrProgress( progress, subProgress );
    }
    else
    {
        m_ocrResultText += aux;
    }

    // kdDebug(28000) << aux << endl;

}

/*
 * Assemble the result text
 */
TQString KSANEOCR::ocrResultText()
{
    TQString res;
    const TQString space(" ");

    /* start from the back and search the original word to replace it */
    TQValueVector<ocrWordList>::iterator pageIt;

    for( pageIt = m_ocrPage.begin(); pageIt != m_ocrPage.end(); ++pageIt )
    {
        /* thats goes over all lines */
        TQValueList<ocrWord>::iterator lineIt;
        for( lineIt = (*pageIt).begin(); lineIt != (*pageIt).end(); ++lineIt )
        {
            res += space + *lineIt;
        }
        res += "\n";
    }
    kdDebug(28000) << "Returning result String  " << res << endl;
    return res;
}


/* --------------------------------------------------------------------------------
 * event filter to filter the mouse events to the image viewer
 */

void KSANEOCR::setImageCanvas( ImageCanvas *canvas )
{
    m_imgCanvas = canvas;

    m_imgCanvas->installEventFilter( this );
}


bool KSANEOCR::eventFilter( TQObject *object, TQEvent *event )
{
    TQWidget *w = (TQWidget*) object;

    if( m_applyFilter && m_imgCanvas && w == m_imgCanvas )
    {
        if( event->type() == TQEvent::MouseButtonDblClick )
        {
            TQMouseEvent *mev = TQT_TQMOUSEEVENT(event);

            int x = mev->x();
            int y = mev->y();
            int scale = m_imgCanvas->getScaleFactor();

	    m_imgCanvas->viewportToContents( mev->x(), mev->y(),
					     x, y );

            kdDebug(28000) << "Clicked to " << x << "/" << y << ", scale " << scale << endl;
            if( scale != 100 )
            {
                // Scale is e.g. 50 that means tha the image is only half of size.
                // thus the clicked coords must be multiplied with 2
                y = int(double(y)*100/scale);
                x = int(double(x)*100/scale);
            }
            /* now search the word that was clicked on */
            TQValueVector<ocrWordList>::iterator pageIt;

	    int line = 0;
	    bool valid = false;
	    ocrWord wordToFind;

            for( pageIt = m_ocrPage.begin(); pageIt != m_ocrPage.end(); ++pageIt )
            {
                TQRect r = (*pageIt).wordListRect();

                if( y > r.top() && y < r.bottom() )
                {
		   kdDebug(28000)<< "It is in between " << r.top() << "/" << r.bottom()
				 << ", line " << line << endl;
		   valid = true;
		   break;
                }
		line++;
            }

	    /*
	     * If valid, we have the line into which the user clicked. Now we
	     * have to find out the actual word
	     */
	    if( valid )
	    {
	       valid = false;
	       /* find the word in the line and mark it */
	       ocrWordList words = *pageIt;
	       ocrWordList::iterator wordIt;

	       for( wordIt = words.begin(); wordIt != words.end() && !valid; ++wordIt )
	       {
		  TQRect r = (*wordIt).rect();
		  if( x > r.left() && x < r.right() )
		  {
		     wordToFind = *wordIt;
		     valid = true;
		  }
	       }

	    }

	    /*
	     * if valid, the wordToFind contains the correct word now.
	     */
	    if( valid )
	    {
	       kdDebug(28000) << "Found the clicked word " << wordToFind << endl;
	       emit selectWord( line, wordToFind );
	    }

            return true;
        }
    }
    return false;
}



/* --------------------------------------------------------------------------------
 * Spellbook support
 */


/*
 * This slot is hit when the checkWord method of KSpell thinks a word is wrong.
 * KSpell detects the correction by itself and delivers it in newword here.
 * To see all alternatives KSpell proposes, slMissspelling must be used.
 */
void KSANEOCR::slSpellCorrected( const TQString& originalword,
                                 const TQString& newword,
                                 unsigned int pos )
{
    kdDebug(28000) << "Corrected: Original Word " << originalword << " was corrected to "
                   << newword << ", pos ist " << pos << endl;

    kdDebug(28000) << "Dialog state is " << m_spell->dlgResult() << endl;

    if( slUpdateWord( m_ocrCurrLine, pos, originalword, newword ) )
    {
        if( m_imgCanvas && m_currHighlight > -1 )
        {
            if( m_applyFilter )
                m_imgCanvas->removeHighlight( m_currHighlight );
        }
        else
        {
            kdDebug(28000) << "No highlighting to remove!" << endl;
        }
    }

}


void KSANEOCR::slSpellIgnoreWord( const TQString& word )
{
    ocrWord ignoreOCRWord;

    ignoreOCRWord = ocrWordFromKSpellWord( m_ocrCurrLine, word );
    if( ! ignoreOCRWord.isEmpty() )
    {
        emit ignoreWord( m_ocrCurrLine, ignoreOCRWord );

        if( m_imgCanvas && m_currHighlight > -1 )
        {
            m_imgCanvas->removeHighlight( m_currHighlight );

            /* create a new highlight. That will never be removed */
            TQBrush brush;
            TQPen pen( gray, 1 );
            TQRect r = ignoreOCRWord.rect();
            r.moveBy(0,2);  // a bit offset to the top

            if( m_applyFilter )
                m_imgCanvas->highlight( r, pen, brush );
        }
    }
}

ocrWord KSANEOCR::ocrWordFromKSpellWord( int line, const TQString& word )
{
    ocrWord resWord;
    if( lineValid(line) )
    {
        ocrWordList words = m_ocrPage[line];

        words.findFuzzyIndex( word, resWord );
    }

    return resWord;
}


bool KSANEOCR::lineValid( int line )
{
    bool ret = false;

    if( line >= 0 && (uint)line < m_ocrPage.count() )
        ret = true;

    return ret;
}

void KSANEOCR::slMisspelling( const TQString& originalword, const TQStringList& suggestions,
                              unsigned int pos )
{
    /* for the first try, use the first suggestion */
    ocrWord s( suggestions.first());
    kdDebug(28000) << "Misspelled: " << originalword << " at position " << pos << endl;

    int line = m_ocrCurrLine;
    m_currHighlight = -1;

    // ocrWord resWord = ocrWordFromKSpellWord( line, originalword );
    ocrWordList words = m_ocrPage[line];
    ocrWord resWord;
    kdDebug(28000) << "Size of wordlist (line " << line << "): " << words.count() << endl;

    if( pos < words.count() )
    {
        resWord = words[pos];
    }

    if( ! resWord.isEmpty() )
    {
        TQBrush brush;
        brush.setColor( TQColor(red)); // , "Dense4Pattern" );
        brush.setStyle( Qt::Dense4Pattern );
        TQPen pen( red, 2 );
        TQRect r = resWord.rect();

        r.moveBy(0,2);  // a bit offset to the top

        if( m_applyFilter )
            m_currHighlight = m_imgCanvas->highlight( r, pen, brush, true );

        kdDebug(28000) << "Position ist " << r.x() << ", " << r.y() << ", width: "
		       << r.width() << ", height: " << r.height() << endl;

        /* draw a line under the word to check */

        /* copy the source */
        emit repaintOCRResImage();
    }
    else
    {
        kdDebug(28000) << "Could not find the ocrword for " << originalword << endl;
    }

    emit markWordWrong( line, resWord );
}

/*
 * This is the global starting point for spell checking of the ocr result.
 * After the KSpell object was created in method finishedOCRVisible, this
 * slot is called if the KSpell-object feels itself ready for operation.
 * Coming into this slot, the spelling starts in a line by line manner
 */
void KSANEOCR::slSpellReady( KSpell *spell )
{
    m_spell = spell;
    connect ( m_spell, TQT_SIGNAL( misspelling( const TQString&, const TQStringList&,
                                            unsigned int )),
              this, TQT_SLOT( slMisspelling(const TQString& ,
                                        const TQStringList& ,
                                        unsigned int  )));
    connect( m_spell, TQT_SIGNAL( corrected ( const TQString&, const TQString&, unsigned int )),
             this, TQT_SLOT( slSpellCorrected( const TQString&, const TQString&, unsigned int )));

    connect( m_spell, TQT_SIGNAL( ignoreword( const TQString& )),
             this, TQT_SLOT( slSpellIgnoreWord( const TQString& )));

    connect( m_spell, TQT_SIGNAL( done(bool)), this, TQT_SLOT(slCheckListDone(bool)));

    kdDebug(28000) << "Spellcheck available" << endl;

    if( m_ocrProcessDia && m_hideDiaWhileSpellcheck )
        m_ocrProcessDia->hide();
    emit readOnlyEditor( true );
    startLineSpellCheck();
}

/**
 * slot called after either the spellcheck finished or the KSpell object found
 * out that it does not want to run because of whatever problems came up.
 * If it is an KSpell-init problem, the m_spell variable is still zero and
 * Kooka pops up a warning.
 */
void KSANEOCR::slSpellDead()
{
    if( ! m_spell )
    {
        kdDebug(28000) << "Spellcheck NOT available" << endl;
        /* Spellchecking has not yet been existing, thus there is a base problem with
         * spellcheck on this system.
         */
        KMessageBox::error( m_parent,
                            i18n("Spell-checking cannot be started on this system.\n"
                                 "Please check the configuration" ),
                            i18n("Spell-Check") );

    }
    else
    {
        if( m_spell->status() == KSpell::Cleaning )
        {
            kdDebug(28000) << "KSpell cleans up" << endl;
        }
        else if( m_spell->status() == KSpell::Finished )
        {
            kdDebug(28000) << "KSpell finished" << endl;
        }
        else if( m_spell->status() == KSpell::Error )
        {
            kdDebug(28000) << "KSpell finished with Errors" << endl;
        }
        else if( m_spell->status() == KSpell::Crashed )
        {
            kdDebug(28000) << "KSpell Chrashed" << endl;
        }
        else
        {
            kdDebug(28000) << "KSpell finished with unknown state!" << endl;
        }

        /* save the current config */
        delete m_spell;
        m_spell = 0L;

        /* reset values */
        m_checkStrings.clear();
        m_ocrCurrLine = 0;
        if( m_imgCanvas && m_currHighlight > -1 )
            m_imgCanvas->removeHighlight( m_currHighlight );

    }
    if( m_ocrProcessDia )
        m_ocrProcessDia->show();
    emit readOnlyEditor( false );
}


/**
 * This slot reads the current line from the member m_ocrCurrLine and
 * writes the corrected wordlist to the member page word lists
 */
void KSANEOCR::slCheckListDone(bool)
{

    /*
     * nothing needs to be updated here in the texts, because it is already done
     * in the slSpellCorrected  slot
     */

    /* Check the dialog state here */
    if( m_spell->dlgResult() == KS_CANCEL ||
        m_spell->dlgResult() == KS_STOP )
    {
        /* stop processing */
        m_spell->cleanUp();
    }
    else
    {
        m_ocrCurrLine++;
	kdDebug(28000) << "Starting spellcheck from CheckListDone" << endl;
        startLineSpellCheck();
    }
}

/**
 * updates the word at position spellWordIndx in line line to the new word newWord.
 * The original word was origWord. This slot is called from slSpellCorrected
 *
 */
bool KSANEOCR::slUpdateWord( int line, int spellWordIndx, const TQString& origWord,
                             const TQString& newWord )
{
    bool result = false;

    if( lineValid( line ))
    {
        ocrWordList words = m_ocrPage[line];
        kdDebug(28000) << "Updating word " << origWord << " to " << newWord << endl;

        if( words.updateOCRWord( words[spellWordIndx] /* origWord */, newWord ) )  // searches for the word and updates
        {
            result = true;
            emit updateWord( line, origWord, newWord );
        }
        else
            kdDebug(28000) << "WRN: Update from " << origWord << " to " << newWord << " failed" << endl;

    }
    else
    {
        kdDebug(28000) << "WRN: Line " << line << " no not valid!" << endl;
    }
    return result;
}


char KSANEOCR::UndetectedChar = '_';

/* -- */
#include "ksaneocr.moc"