Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.

BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdepim@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
author: toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2009-11-25 17:56:58 +0000
committer: toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2009-11-25 17:56:58 +0000
commit: 460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 (patch)
tree: 67208f7c145782a7e90b123b982ca78d88cc2c87 /indexlib/tests/large-scale/generate.py
download: tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.tar.gz
tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.zip
1 files changed, 51 insertions, 0 deletions
diff --git a/indexlib/tests/large-scale/generate.py b/indexlib/tests/large-scale/generate.py
new file mode 100644
index 000000000..3a66df3be
--- /dev/null
+++ b/indexlib/tests/large-scale/generate.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+import random
+import re
+
+def init_chain(infile):
+	chain = {}
+	last = ('','')
+	for line in infile:
+		for word in line.split():
+			if not chain.has_key(last):
+				chain[last]=[]
+			chain[last].append(word)
+			last=(last[1],word)
+	chain[last]=None
+	return chain
+
+def output(chain,length,outputfile):
+	last = ('','')
+	start=2000
+	for i in range(length+start):
+		if chain[last] is None:
+			break
+		word = random.choice(chain[last])
+		last=(last[1],word)
+		if i > start:
+			outputfile.write(word)
+			outputfile.write(' ')
+	outputfile.write("\n")
+
+def get_words(chain,nwords,outputfile,scriptfile):
+	scriptfile.write("(for f in output/text_*; echo $f) > tmp/so_far\n")
+	for i in range(nwords):
+		word='1'
+		while re.compile("\d").search(word):
+			word=random.choice(random.choice(chain.keys()))
+		word=re.sub(r'\W','',word)
+		outputfile.write(word+"\n")
+		scriptfile.write("grep -i -E -e '(\W|^)%s' -l output/text_* >tmp/part_%s\n" % (word,word))
+		scriptfile.write("perl -e '($file1, $file2) = @ARGV; open F2, $file2; while (<F2>) {$h2{$_}++}; open F1, $file1; while (<F1>) {if ($h2{$_}) {print $_; $h2{$_} = 0;}}' tmp/part_%s tmp/so_far >tmp/so_far_\n" % word) # From scriptome
+		scriptfile.write("mv tmp/so_far_ tmp/so_far\n")
+		scriptfile.write("rm tmp/part_%s\n" % word)
+	scriptfile.write("mv tmp/so_far tmp/expected\n")
+		
+
+chain=init_chain(file("/dev/stdin"))
+for i in range(10000):
+	output(chain,2000,file("output/text_"+str(i+1),'w'))
+
+
+for i in range(1000):
+	get_words(chain,random.randint(1,5),file("output/words_%s.list"%str(i+1),'w'),file("output/words_%s.script"%str(i+1),'w'))
author	toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2009-11-25 17:56:58 +0000
committer	toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2009-11-25 17:56:58 +0000
commit	460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 (patch)
tree	67208f7c145782a7e90b123b982ca78d88cc2c87 /indexlib/tests/large-scale/generate.py
download	tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.tar.gz tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.zip