diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/t_factors')
-rwxr-xr-x | debian/htdig/htdig-3.2.0b6/test/t_factors | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/t_factors b/debian/htdig/htdig-3.2.0b6/test/t_factors new file mode 100755 index 00000000..c1127077 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_factors @@ -0,0 +1,235 @@ +#!/bin/sh +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_factors,v 1.7 2004/06/05 06:26:22 lha Exp $ +# + +# Tests (or should eventually test) the following config attributes: +# author_factor +# backlink_factor +# caps_factor +# date_factor (TODO) +# description_factor +# heading_factor +# keywords_factor +# meta_description_factor +# multimatch_factor +# search_results_order +# text_factor +# title_factor +# url_seed_score +# url_text_factor + +# try_order comment query pattern1 patern2 ... +# comment - description of test, displayed if error occurs +# query - search string passed to htsearch +# pattern - strings expected to occur *in order* in the output +try_order() { + comment="$1" + shift + query="$1" + shift + $htsearch -c $config "$query" > $tmp 2> /dev/null + array="" + for pattern + do + array="$array; array[i++] = "\"$pattern\" + done + miss=`$awk "BEGIN {$array; line = 0; } \ + "'$0'" ~ \".*\"array[line] { line++ } \ + END { print array[line] } " < $tmp ` + if [ "$miss" != "" ] + then + $htsearch -vv -c $config "$query" > /dev/null + echo "String \"$miss\" was not found where expected" + fail "$htsearch -c $config '$query' >> $tmp -- + $comment" + fi +} + + + + +test_functions_action=--start-apache +. ./test_functions + +config=$testdir/conf/htdig.conf.tmp +tmp=/tmp/t_htsearch$$ + +# set up config file with chosen non-default values +cp $testdir/conf/htdig.conf $config + +$htdig "$@" -t -i -c $config || fail "Couldn't dig" +$htpurge -c $config || fail "Couldn't purge" + +try_order "Search for 'also'" \ + "words=also" \ + '4 matches' 'site2.html' 'site4.html' 'bad_local.htm' 'script.html' + +set_attr url_seed_score "site4 *1000+1000" +try_order "Seed score 1000 for site4.html" \ + "words=also" \ + '4 matches' 'site4.html' 'site2.html' 'bad_local.htm' 'script.html' + +set_attr url_seed_score "site4 *1000+1000 script *1000+1000" +try_order "Seed score 1000 for site4.html and script.html" \ + "words=also" \ + '4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm' + +set_attr url_seed_score "site4|script *1000+1000" +try_order "Seed score 1000 for site4|script" \ + "words=also" \ + '4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm' + +set_attr search_results_order "bad_local" +try_order "Search_results_order bad_local" \ + "words=also" \ + '4 matches' 'bad_local.htm' 'site4.html' 'script.html' 'site2.html' + +set_attr search_results_order "script * e2|e4" +try_order "Search_results_order * script e2|e4" \ + "words=also" \ + '4 matches' 'script.html' 'bad_local.htm' 'site4.html' 'site2.html' + +set_attr url_seed_score "" +set_attr search_results_order "" +set_attr author_factor 0 +set_attr backlink_factor 0 +set_attr caps_factor 0 # not implemented +set_attr date_factor 0 # TODO +set_attr description_factor 0 +set_attr heading_factor 0 +set_attr keywords_factor 0 +set_attr meta_description_factor 0 +set_attr multimatch_factor 0 +set_attr text_factor 0 +set_attr title_factor 0 +set_attr url_text_factor 0 # not implemented + +try_order "Search with factors 0" \ + "words=also" \ + 'No matches' + +try_order "Search for 'service' with title_factor 0" \ + "words=service" \ + 'No matches' +set_attr title_factor 1 +try_order "Search for 'service' with title_factor 1" \ + "words=service" \ + '1 matches' 'script.html' +set_attr text_factor 0.3 +try_order "Greater weight to title factor" \ + "words=service" \ + '4 matches' 'script.html' 'site4.html' 'site%201.html' 'site3.html' +set_attr title_factor -3.2 +try_order "Checking negative title factor" \ + "words=service" \ + '4 matches' 'site4.html' 'site%201.html' 'site3.html' 'script.html' +set_attr title_factor 0 +set_attr text_factor 0 + +# test with all factors 0 except the one which matches + +set_attr description_factor 1 +try_order "Search for 'crossRef' with description_factor 1" \ + "words=crossRef" \ + '1 matches' 'site%201.html' +set_attr description_factor 0 + +set_attr author_factor 1 +try_order "Search for 'media' with author_factor 1" \ + "words=media" \ + '1 matches' 'script.html' +set_attr author_factor 0 + +set_attr meta_description_factor 1 +try_order "Search for 'stars' with meta_description_factor 1" \ + "words=stars" \ + '1 matches' 'site2.html' +set_attr meta_description_factor 0 + +set_attr heading_factor 1 +try_order "Search for 'obtain' with heading_factor 1" \ + "words=obtain" \ + '1 matches' 'bad_local.htm' +set_attr heading_factor 0 + +set_attr keywords_factor 1 +try_order "Search for 'newWord' with keywords_factor 1" \ + "words=newWord" \ + '1 matches' 'title.html' +set_attr keywords_factor 0 + + +# test with all document-based factors non-zero except the one which matches +set_attr author_factor 1 +#set_attr backlink_factor 1 # not document based +set_attr caps_factor 1 +#set_attr date_factor 1 # not document based +set_attr description_factor 1 +set_attr heading_factor 1 +set_attr keywords_factor 1 +set_attr meta_description_factor 1 +set_attr multimatch_factor 1 +set_attr text_factor 1 +set_attr title_factor 1 +set_attr url_text_factor 1 +set_attr description_factor 1 + +set_attr description_factor 0 +try_order "Search for 'crossRef' with description_factor 0" \ + "words=crossRef" \ + '1 matches' 'title.html' +set_attr description_factor 1 + +set_attr author_factor 0 +try_order "Search for 'media' with author_factor 0" \ + "words=media" \ + 'No matches' +set_attr author_factor 1 + +set_attr meta_description_factor 0 +try_order "Search for 'stars' with meta_description_factor 0" \ + "words=stars" \ + 'No matches' +set_attr meta_description_factor 1 + +set_attr heading_factor 0 +try_order "Search for 'obtain' with heading_factor 0" \ + "words=obtain" \ + 'No matches' +set_attr heading_factor 1 + +set_attr keywords_factor 0 +try_order "Search for 'newWord' with keywords_factor 0" \ + "words=newWord" \ + 'No matches' +set_attr keywords_factor 1 + +# multimatch_factor gives a "boost" to searches matching multiple terms +set_attr title_factor 10 # "get" in title of bad_local +set_attr multimatch_factor 10000 +try_order "Search for 'get or interest or repay' with multimatch_factor 10000" \ + "words=get+interest+repay;method=or" \ + '2 matches' 'site4.html' 'bad_local.htm' +set_attr multimatch_factor 0 +try_order "Search for 'get or interest or repay' with multimatch_factor 0" \ + "words=get+interest+repay;method=or" \ + '2 matches' 'bad_local.htm' 'site4.html' + +# backlink counts the number of references (of any type) to this document +set_attr backlink_factor 0 +try_order "site4.html has repay+interest, site 1.html only has suggestions" \ + "words=suggestions+repay+interest;method=or" \ + '2 matches' 'site4.html' 'site%201.html' +set_attr backlink_factor 100 +try_order "site 1.html has a higher ratio of backlinks to outgoing links" \ + "words=suggestions+repay+interest;method=or" \ + '2 matches' 'site%201.html' 'site4.html' + +test_functions_action=--stop-apache +. ./test_functions |