diff options
Diffstat (limited to 'khotkeys/shared/voicesignature.cpp')
-rw-r--r-- | khotkeys/shared/voicesignature.cpp | 422 |
1 files changed, 422 insertions, 0 deletions
diff --git a/khotkeys/shared/voicesignature.cpp b/khotkeys/shared/voicesignature.cpp new file mode 100644 index 000000000..4a1e12880 --- /dev/null +++ b/khotkeys/shared/voicesignature.cpp @@ -0,0 +1,422 @@ +/*************************************************************************** + * Copyright (C) 2005 by Olivier Goffart * + * ogoffart@kde.org * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + ***************************************************************************/ +#include "voicesignature.h" +#include "sound.h" +#include <tdeconfig.h> + +#include <math.h> +#ifdef PI +#undef PI +#endif +#define PI (2.0 * asin(1.0)) + + +#include <kdebug.h> +#include <tqdatetime.h> + +#undef Complex + +namespace KHotKeys +{ + + +inline static float ABS(float X) +{ + return (X>0) ? X : -X ; +} +inline static int MAX(int X , int Y) +{ + return (X>Y) ? X : Y ; +} +inline static int MIN(int X , int Y) +{ + return (X<Y) ? X : Y ; +} + + + + + + +class Complex +{ + public: + Complex () {} + Complex (double re): _re(re), _im(0.0) {} + Complex (double re, double im): _re(re), _im(im) {} + double Re () const { return _re; } + double Im () const { return _im; } + void operator += (const Complex& c) + { + _re += c._re; + _im += c._im; + } + void operator -= (const Complex& c) + { + _re -= c._re; + _im -= c._im; + } + void operator *= (const Complex& c) + { + double reT = c._re * _re - c._im * _im; + _im = c._re * _im + c._im * _re; + _re = reT; + } + Complex operator- () + { + return Complex (-_re, -_im); + } + Complex operator- (const Complex& c) const + { + return Complex (_re - c._re, _im - c._im); + } + Complex operator+ (const Complex& c) const + { + return Complex (_re + c._re, _im + c._im); + } + Complex operator* (const Complex& c) const + { + return Complex (_re * c._re - _im * c._im , _im * c._re + _re * c._im); + } + double Mod () const { return sqrt (_re * _re + _im * _im); } + + static Complex fromExp(double mod, double arg) { return Complex(mod*cos(arg) , mod*sin(arg)); } + private: + double _re; + double _im; +}; + +static inline double hamming(uint n, uint size) +{ + return HAMMING ? 0.54-0.46*cos( 2*PI*n /(size-1) ) : 1; +} + + +static TQMemArray<double> fft(const Sound& sound, unsigned int start, unsigned int stop) +{ + if(start>=stop || sound.size() == 0) + return TQMemArray<double>(); + + //We need a sample with a size of a power of two + uint size=stop-start; + unsigned short log2size=0; + while( (1<<log2size) < size ) + log2size++; + + int diff=(1<<log2size) - size; + if(diff > size/4 || 1<<log2size > sound.size() ) + { + log2size--; + diff=(1<<log2size) - size; + } + size=1<<log2size; + int start2=start-diff/2; + int stop2=start2+ size; + if(start2<0) + { + stop2-=start2; + start2=0; + } + if(stop2>sound.size()) + { + start2-= stop2 - sound.size(); + stop2=sound.size(); + if(start2<0) + { + stop2-=start2; + start2=0; + } + } + + //Generate an array to work in + TQMemArray<Complex> samples(size); + + //Fill it with samples in the "reversed carry" order + int rev_carry = 0; + for (uint f = 0; f < size - 1; f++) + { + samples[f]=sound.at(start2+rev_carry)* hamming(rev_carry, size); +// KDEBUG(rev_carry); + int mask = size>>1; // N / 2 + // add 1 backwards + while (rev_carry >= mask) + { + rev_carry -= mask; // turn off this bit + mask >>= 1; + } + rev_carry += mask; + } + samples[size-1]=sound.at(start2+size-1)*hamming(size-1, size); + + //FFT + for(uint level=0; level < log2size; level++) + { + for( int k=0; k< (size>>1) ; k++) + { + uint indice1 = (k << (level+1) ) % (size-1); // (k*2*2^l)%(N-1) + uint indice2 = indice1 + (1<<level); // (k*2*2^l)%(N-1) + 2^l + + uint coefW = ( k << (level+1) ) / (size-1); // (k*2*2^l) div (N-1) + double Wexpn=-2 * PI * coefW / (2 << level); // -2 pi n / 2^(l+1) + Complex W=Complex::fromExp(1, Wexpn) ; + + + //OPERATION BUTTERFLY + Complex a=samples[indice1]; + Complex b=samples[indice2]; + samples[indice1]=a+W*b; + samples[indice2]=a-W*b; + +// kdDebug() << k_funcinfo << "PAPILLON s_" << indice1 << " s_" << indice2 << " W_" << (2<<level) << "^" << coefW << endl; + } + } + + TQMemArray<double> result(size); + for(uint f=0;f<size;f++) + { + result[f]=samples[f].Mod() / size; + + } + return result; +} + + + + +TQMemArray<double> VoiceSignature::fft(const Sound& sound, unsigned int start, unsigned int stop) +{ + return KHotKeys::fft(sound, start, stop); + /*TQMemArray<double> result(8000); + for(int f=0; f<8000;f++) + { + Complex c(0); + + for(uint x=start; x<stop; x++) + { + Complex s(sound.at(x)); + double angle=-2*PI*f*x/8000; + s*= Complex( cos(angle) , sin(angle) ); + c+=s; + } + result[f]= c.Mod()/(stop-start) ; + } + return result;*/ +} + +bool VoiceSignature::window(const Sound& sound, unsigned int *_start, unsigned int *_stop) +{ + bool isNoise=false; + unsigned int length=sound.size(); + uint unit=WINDOW_UNIT; + if(length < unit ) + return false; + + //Fen�trage + unsigned int start=0 , stop=0; + double moy=0; + for(uint x=0;x<unit;x++) + { + moy+=ABS(sound.at(x)); + } + + if(moy>WINDOW_MINIMUM*unit) + isNoise=true; + + for(uint x=unit; x<length; x++) + { + if(moy<WINDOW_MINIMUM*unit) + { + if(stop==0) + start=x-unit/2; + } + else + stop=x-unit/2; + moy+=ABS(sound.at(x)); + moy-=ABS(sound.at(x-unit)); + + } + + if(moy>WINDOW_MINIMUM*unit && isNoise) + return false; + + stop=MIN(length,stop+WINDOW_MINIMUM_ECART); + start=MAX(0 ,start-WINDOW_MINIMUM_ECART); + + if(_start) + *_start=start; + if(_stop) + *_stop=stop; + return start<stop; +} + +//finally doesn't give better results +/*#define HZ_TO_MEL(F) (1127*log(1+(F)/700.0)) +#define MEL_TO_HZ(M) ( ( exp((M)/1127.0) -1) *700 )*/ +#define HZ_TO_MEL(F) (F) +#define MEL_TO_HZ(F) (F) + + +VoiceSignature::VoiceSignature(const Sound& sound) +{ + static uint temp_wind=0, temp_fft=0, temp_moy=0; + TQTime t; + t.start(); + + unsigned int start , stop; + if(!window(sound,&start,&stop)) + { + kdWarning( 1217 ) << k_funcinfo << "No voice found in the sound" << endl ; + return; + } + + temp_wind+=t.restart(); + + uint length=stop-start; + + for(int wind=0; wind<WINDOW_NUMBER; wind++) + { + unsigned int w_start=MAX(start, start+ (int)((wind - WINDOW_SUPER)*length/WINDOW_NUMBER)); + unsigned int w_stop =MIN(stop , start+ (int)((wind+1.0+WINDOW_SUPER)*length/WINDOW_NUMBER)); + + + TQMemArray<double> fourrier=fft(sound, w_start,w_stop); + + temp_fft+=t.restart(); + + //MEL conversion + double mel_start=HZ_TO_MEL(FFT_RANGE_INF); + uint mel_stop=HZ_TO_MEL(FFT_RANGE_SUP); + + for(int four=0; four<FOUR_NUMBER; four++) + { + unsigned int wf_start=mel_start + four*(mel_stop-mel_start)/FOUR_NUMBER; + unsigned int wf_stop=mel_start + (four+1)*(mel_stop-mel_start)/FOUR_NUMBER; + + unsigned int f_start=MEL_TO_HZ( wf_start )*fourrier.size()/sound.fs(); + unsigned int f_stop=MEL_TO_HZ( wf_stop )*fourrier.size()/sound.fs(); + unsigned int f_size=f_stop-f_start; + + double nb=0; + for(uint f=f_start; f<f_stop; f++) + { + int freq=f*fourrier.size()/sound.fs(); + nb+=fourrier[f]*FFT_PONDERATION(freq); + } + nb/=(f_size); + data[wind][four]=nb; + } + + temp_moy+=t.restart(); + + } + +// kdDebug( 1217 ) << k_funcinfo << "wind: "<< temp_wind << " - fft: " << temp_fft << " - moy: " << temp_moy << endl; +} + + + +VoiceSignature::~VoiceSignature() +{ +} + + + +float VoiceSignature::diff(const VoiceSignature &s1, const VoiceSignature &s2) +{ + if(s1.isNull() || s2.isNull()) + return 1000000; +#if 0 + double result=0; + for(int x=0;x<WINDOW_NUMBER;x++) + for(int y=0;y<FOUR_NUMBER;y++) + { + double d1=s1.data[x][y]-s2.data[x][y]; + result+= d1*d1;//*pond[x][y]; + } + return result; +#endif + + //DTW + // http://tcts.fpms.ac.be/cours/1005-08/speech/projects/2001/delfabro_henry_poitoux/ + + const int I=WINDOW_NUMBER; + const int J=WINDOW_NUMBER; + double g[I+1][J+1]; + for(int f=1;f<=J;f++) + g[0][f]=10000000; + for(int f=1;f<=I;f++) + g[f][0]=10000000; + g[0][0]=0; + for(int i=1;i<=I;i++) + for(int j=1;j<=J;j++) + { + double d=0; + for(int f=0;f<FOUR_NUMBER;f++) + { + double d1=s1.data[i-1][f]-s2.data[j-1][f]; + d+= d1*d1;//*pond[x][y]; + } + d=sqrt(d); + g[i][j]=TQMIN(TQMIN( g[i-1][j]+d, g[i][j-1]+d ) , g[i-1][j-1]+d+d ); + } + + return g[I][J]/(I+J); +} + + + + + +int VoiceSignature::size1() +{ + return WINDOW_NUMBER; +} + +int VoiceSignature::size2() +{ + return FOUR_NUMBER; +} + +TQMap<int, TQMap<int, double> > VoiceSignature::pond; + + + +void VoiceSignature::write(TDEConfigBase *cfg, const TQString &key) const +{ + TQStringList sl; + for(int x=0;x<WINDOW_NUMBER;x++) + for(int y=0;y<FOUR_NUMBER;y++) + { + sl.append( TQString::number(data[x][y]) ); + } + cfg->writeEntry(key,sl); +} + +void VoiceSignature::read(TDEConfigBase *cfg, const TQString &key) +{ + TQStringList sl=cfg->readListEntry(key); + for(int x=0;x<WINDOW_NUMBER;x++) + for(int y=0;y<FOUR_NUMBER;y++) + { + data[x][y]= sl[x*FOUR_NUMBER+y].toDouble(); + } +} + +} |