// // C++ Implementation: algorithms // // Description: // // // Author: Andrea Rizzi , (C) 2003 // // Copyright: See COPYING file that comes with this distribution // // #include "algorithms.h" #include #include //FIXME: remove #define i18n (const char*) DataBaseInterface::ResultList ExactSearchAlgorithm::exec(const TQString& query ) { DataBaseInterface::ResultList res; DataBaseInterface::MainEntry e=di->get(query,0); TQStringList trs=e.second.getTranslations(); for(TQStringList::iterator it=trs.begin();it!=trs.end();++it) { emit newResult(QueryResult(*it,e.first.getString(),settings->scoreExact)); res.push_back(QueryResult(*it)); } kdDebug(0) <<"Exact algo found " << res.count() << "entries" << endl; return res; } DataBaseInterface::ResultList GenericSearchAlgorithm::exec(const TQString& query ) { DataBaseInterface::ResultList res; // ExactSearchAlgorithm exact(query,settings); uint countResults=0; for(TQValueList::iterator algoit = algoChain.begin(); algoit!=algoChain.end() && countResults < maxResults; algoit++) { connect(*algoit,TQ_SIGNAL(newResult(QueryResult)),this,TQ_SIGNAL(newResult(QueryResult))); kdDebug(0) << "Algo pointer" << (*algoit) << endl; res+=(*algoit)->exec(query); countResults=res.count(); kdDebug(0) << "Count = " << countResults << endl; disconnect(*algoit,TQ_SIGNAL(newResult(QueryResult)),this,TQ_SIGNAL(newResult(QueryResult))); } return res; } void GenericSearchAlgorithm::addAlgorithm( AbstractSearchAlgorithm * algo ) { algoChain.append(algo); } DataBaseInterface::ResultList AlphaSearchAlgorithm::exec( const TQString & query ) { DataBaseInterface::ResultList res; DBItemMultiIndex::IndexList il=di->getAlpha(query); for(DBItemMultiIndex::IndexList::iterator it=il.begin();it!=il.end()&&!di->stopNow();++it) { DataBaseInterface::MainEntry e=di->getFromIndex(*it); TQStringList trs=e.second.getTranslations(); for(TQStringList::iterator it=trs.begin();it!=trs.end() && !di->stopNow();++it) { QueryResult r(di->format(di->simple(*it,true),query),e.first.getString(),settings->scoreAlpha); emit newResult(r); res.push_back(r); } } kdDebug(0) <<"Alpha algo found " << res.count() << "entries" << endl; return res; } DataBaseInterface::ResultList SentenceArchiveSearchAlgorithm::exec( const TQString & query ) { DataBaseInterface::ResultList res; DataBaseInterface::MainEntry e = di->getSentence(query); TQStringList trs=e.second.getTranslations(); kdDebug(0) << "Count in sentence archive " << trs.count()<< endl; for(TQStringList::iterator it=trs.begin();it!=trs.end();++it) { QueryResult r(di->format(di->simple(*it,true),query),e.first.getString(),settings->scoreSentence); emit newResult(r); res.push_back(r); } kdDebug(0) <<"Sentence algo found " << res.count() << "entries" << endl; return res; } DataBaseInterface::ResultList ChunkByChunkSearchAlgorithm::exec( const TQString & query ) { ResultList res; factory->setQuery(query); TQPtrList chunks=factory->chunks(); kdDebug(0) << "Number of chunks " << chunks.count() << endl; chunks.setAutoDelete(true); //I should delete the chunks myself TQStringList querySeparators=factory->separators(); //This prevents recursive loop. if (chunks.count()<=1) return res; TQStringList translations,tmpTranslations; translations.push_back(""); //FIXME this is needed to start , but is not good int finalscore=0; int i=0; TQMap translationUsed; //Loop on all chunk for(AbstractChunk *it=chunks.first();it && !di->stopNow(); it=chunks.next()) { kdDebug(0) << "Process next chunk" << endl; int chunkscore=0; TQValueList r=it->translations(); kdDebug(0) << "Number of results for this chunk " << r.count() << endl; if(r.count()<1) { // kdDebug(0) << "Nothing found for:" << it->translations() << endl; chunkscore=-10; } else { //FIXME: check this, why 0? it is the best one? chunkscore=r[0].score(); kdDebug(0) << "ChunkScore " << chunkscore << endl; tmpTranslations.clear(); //Loop on results translationUsed.clear(); for(ResultList::iterator it1=r.begin();it1!=r.end() &&!di->stopNow(); ++it1) { TQString chunkTranslation= (*it1).result(); if(!translationUsed.contains(chunkTranslation)) { translationUsed[chunkTranslation]=true; kdDebug(0) << "a translation is: " << chunkTranslation << endl; for(TQStringList::iterator it2=translations.begin();it2!=translations.end() && !di->stopNow() ; it2++) { TQString prevTranslation=*it2; tmpTranslations.push_back(prevTranslation+chunkTranslation+querySeparators[i]); kdDebug(0) << "..appending it to " << prevTranslation << endl; } } } translations=tmpTranslations; } //kdDebug(0) << it-> << r[0].result() << "#" << querySeparators[i] << endl; i++; finalscore+=chunkscore; kdDebug(0) << "partial score " << finalscore; } kdDebug(0) << "this is finishd" << endl; if(settings->scoreChunkByChunk==0) settings->scoreChunkByChunk=1; // FIXME:fix the score system // finalscore/=(i*100*100/settings->scoreChunkByChunk); //change 100 to 120(?) to lower this result (done) if (finalscore<50) return res; for(TQStringList::iterator it2=translations.begin();it2!=translations.end() && !di->stopNow() ; it2++) { TQString theTranslation=*it2; QueryResult qr(di->format(theTranslation,query),i18n("CHUNK BY CHUNK"),finalscore); qr.setRichOriginal(i18n("

Chunk by chunk

CHANGE THIS TEXT!!!!This translation is" "obtained translating the sentences and using a" "fuzzy sentence translation database.
" " Do not rely on it. Translations may be fuzzy.
")); qr.setRichResult(""+theTranslation+"") ; emit newResult(qr); res.push_back(qr); } return res; } ChunkByChunkSearchAlgorithm::ChunkByChunkSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ): AbstractSearchAlgorithm(dbi,sets) , factory(0) { } SentenceArchiveSearchAlgorithm::SentenceArchiveSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ): AbstractSearchAlgorithm(dbi,sets) { } FuzzyChunkSearchAlgorithm::FuzzyChunkSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ) : AbstractSearchAlgorithm(dbi,sets) { } DataBaseInterface::ResultList FuzzyChunkSearchAlgorithm::exec( const TQString & query ) { //FIXME: this code is shit too ResultList res; factory->setQuery(query); TQPtrList querychunks = factory->chunks(); querychunks.setAutoDelete(true); typedef TQMap > ResultMap; ResultMap rmap; //result of words index query unsigned int notfound=0,frequent=0,nchunks = querychunks.count(); //Get index list for each word for(AbstractChunk *it=querychunks.first(); it &&!di->stopNow() ; it=querychunks.next() ) { TQValueList locations = (*it).locationReferences(); if(locations.count()>0) { rmap[(*it).chunkString()] = locations; if(locations.count()>1000) //FIXME NORMALIZE THIS!!! { frequent++; kdDebug(0) << "\""<<(*it).chunkString() << "\" is frequent" <list of occurency" TQValueList::iterator countpos[nchunks+1]; TQValueList il; for(int i = 0;i<=nchunks&&!di->stopNow();i++) countpos[i]=il.end(); unsigned int bestcount=0; while(!rmap.isEmpty()) { unsigned int ref,count; ref=(unsigned int)-1; count=0; //This will find the min head and count how many times it occurs for(ResultMap::iterator it = rmap.begin();it!=rmap.end()&&!di->stopNow();++it) { unsigned int thisref=it.data().first(); if(thisrefstopNow();) { it.data().remove(ref); //kdDebug(0)<< ((frequent<(nwords-notfound)) && (it.data().count()>350)) <1000))) //very dirty hack... { ResultMap::iterator it2=it; it++; rmap.remove(it2); } else it++; } //This should be configurable or optimized: if(count>=(nchunks-notfound)*0.50 && count!=0) { il.insert(countpos[count],ref); for(unsigned int i = nchunks;i>=count;i--) if(countpos[i]==countpos[count]) countpos[i]--; } } //loop on number of words found int bestscore=0; for(unsigned int wf=nchunks;wf>0;wf-- ){ for(TQValueList::iterator it=countpos[wf];it!=countpos[wf-1] ;++it) { //loop on entries with same number of word found DataBaseInterface::MainEntry e; e=di->getFromIndex(*it); TQStringList trs=e.second.getTranslations(); for(TQStringList::iterator it=trs.begin();it!=trs.end()&&!di->stopNow();++it) { unsigned int cinr=factory->chunks(*it).count(); //chunk in result //compute a score, lets kbabel sort now, it should be fast... int score=90*wf/nchunks-(signed int)90*(((nchunks-cinr)>0)?(nchunks-cinr):(cinr-nchunks))/(nchunks*10); if(score>bestscore) bestscore=score; if(score>bestscore*0.40) { // kdDebug(0) << "s: "<0)?(nwords-winr):(winr-nwords))/(nwords*10)<< endl; // FIXME: format better the richtext TQString ori=e.first.getString(); TQString re=di->format(di->simple(*it,true),query); QueryResult r(re,ori,score); for(TQPtrListIterator it(querychunks); it.current() && di->stopNow() ; ++it){ ori=ori.replace(TQRegExp((*it)->chunkString(),false),""+(*it)->chunkString()+""); } r.setRichOriginal(ori); if(!di->stopNow()) emit newResult(r); res.push_back(r); } } } } return res; } DataBaseInterface::ResultList CorrelationSearchAlgorithm::exec( const TQString & query ) { //FIXME, this code is shit. DataBaseInterface::ResultList res; if(di->words(query).count()>1) return res; TQMap corRes = di->correlation(query,0,false); float max=0,max1=0,max2=0; TQString best,best1,best2; for(TQMap::iterator it = corRes.begin(); it !=corRes.end(); ++it) { if(it.data()>max) { max2=max1; best2=best1; max1=max; best1=best; best = it.key(); max=it.data(); } } if(!best.isEmpty()) { double myscore=0.01*max*settings->scoreDynamic; QueryResult r(di->format(best,query),i18n("DYNAMIC DICT:"),myscore); r.setRichOriginal(i18n("

Dynamic Dictionary

This is a dynamic dictionary created" " looking for correlation of original and translated words.
" " Do not rely on it. Translations may be fuzzy.
")); r.setRichResult(""+di->format(best,query)+"") ; res.push_back(r); if(!di->stopNow()) emit newResult(r); } if(!best1.isEmpty()) { double myscore=0.01*max1*settings->scoreDynamic; QueryResult r(di->format(best1,query),i18n("DYNAMIC DICT:"),myscore); r.setRichOriginal(i18n("

Dynamic Dictionary

This is a dynamic dictionary created" " looking for correlation of original and translated words.
" " Do not rely on it. Translations may be fuzzy.
")); r.setRichResult(""+di->format(best1,query)+"") ; res.push_back(r); if(!di->stopNow()) emit newResult(r); } kdDebug(0) << "Correlation algorithm found" << res.count() << "results"; return res; } GenericSearchAlgorithm::GenericSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ): AbstractSearchAlgorithm(dbi,sets) { maxResults = 5; //FIXME use as default somthing from DBSESettings } SingleWordSearchAlgorithm::SingleWordSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ) : GenericSearchAlgorithm(dbi,sets), exact(dbi,sets), alpha(dbi,sets), sentence(dbi,sets), corr(dbi,sets), chunk(dbi,sets),casefactory(dbi) { addAlgorithm(&exact); addAlgorithm(&alpha); addAlgorithm(&sentence); chunk.setChunkFactory(&casefactory); addAlgorithm(&chunk); addAlgorithm(&corr); } DataBaseInterface::ResultList SingleWordSearchAlgorithm::exec( const TQString & query ) { if(di->words(query).count()>1) return ResultList(); return GenericSearchAlgorithm::exec(query); } //#include "algorithms.moc"