diff options
author | toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2009-11-25 17:56:58 +0000 |
---|---|---|
committer | toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2009-11-25 17:56:58 +0000 |
commit | 460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 (patch) | |
tree | 67208f7c145782a7e90b123b982ca78d88cc2c87 /indexlib/tests/tokenizer-test.cpp | |
download | tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.tar.gz tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.zip |
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.
BUG:215923
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdepim@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'indexlib/tests/tokenizer-test.cpp')
-rw-r--r-- | indexlib/tests/tokenizer-test.cpp | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/indexlib/tests/tokenizer-test.cpp b/indexlib/tests/tokenizer-test.cpp new file mode 100644 index 000000000..372859d90 --- /dev/null +++ b/indexlib/tests/tokenizer-test.cpp @@ -0,0 +1,69 @@ +#include <boost/test/unit_test.hpp> +#include "tokenizer.h" +#include <cassert> + +using namespace ::boost::unit_test; +namespace indexlib { namespace tests { namespace tokenizer_test { + +using indexlib::detail::tokenizer; +using indexlib::detail::get_tokenizer; + +void simple() { + std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" ); + assert(tokenizer.get()); + std::vector<std::string> tokens = tokenizer->string_to_words( "one ,as, ''#`:ThReE, בבאחי" ); + std::vector<std::string> expected; + expected.push_back( "ONE" ); + expected.push_back( "AS" ); + expected.push_back( "THREE" ); + expected.push_back( "AAACE" ); + std::sort( tokens.begin(), tokens.end() ); + std::sort( expected.begin(), expected.end() ); + BOOST_CHECK_EQUAL( expected.size(), tokens.size() ); + for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) { + BOOST_CHECK_EQUAL( expected[ i ], tokens[ i ] ); + } +} + +void with_newlines() { + std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" ); + assert(tokenizer.get()); + std::vector<std::string> tokens = tokenizer->string_to_words( "one\ntwo\nthree" ); + std::vector<std::string> expected; + expected.push_back( "ONE" ); + expected.push_back( "TWO" ); + expected.push_back( "THREE" ); + std::sort( tokens.begin(), tokens.end() ); + std::sort( expected.begin(), expected.end() ); + BOOST_CHECK_EQUAL( expected.size(), tokens.size() ); + for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) { + BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) ); + } +} + +void with_numbers() { + std::auto_ptr<tokenizer> tokenizer = get_tokenizer( "latin-1:european" ); + assert(tokenizer.get()); + std::vector<std::string> tokens = tokenizer->string_to_words( "one 012 123 four" ); + std::vector<std::string> expected; + expected.push_back( "ONE" ); + expected.push_back( "012" ); + expected.push_back( "123" ); + expected.push_back( "FOUR" ); + std::sort( tokens.begin(), tokens.end() ); + std::sort( expected.begin(), expected.end() ); + BOOST_CHECK_EQUAL( expected.size(), tokens.size() ); + for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) { + BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) ); + } +} + +test_suite* get_suite() { + test_suite* test = BOOST_TEST_SUITE( "Tokenizer tests" ); + test->add( BOOST_TEST_CASE( &simple ) ); + test->add( BOOST_TEST_CASE( &with_newlines ) ); + test->add( BOOST_TEST_CASE( &with_numbers ) ); + return test; +} + +}}} //namespaces |