|
Lucene example source code file (TestStressIndexing2.java)
This example Lucene source code file (TestStressIndexing2.java) is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.
The Lucene TestStressIndexing2.java source code
package org.apache.lucene.index;
/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import junit.framework.Assert;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util._TestUtil;
public class TestStressIndexing2 extends LuceneTestCase {
static int maxFields=4;
static int bigFieldSize=10;
static boolean sameFieldOrder=false;
static int mergeFactor=3;
static int maxBufferedDocs=3;
static int seed=0;
public class MockIndexWriter extends IndexWriter {
public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
super(dir, conf);
}
@Override
boolean testPoint(String name) {
// if (name.equals("startCommit")) {
if (random.nextInt(4) == 2)
Thread.yield();
return true;
}
}
public void testRandomIWReader() throws Throwable {
Directory dir = newDirectory();
// TODO: verify equals using IW.getReader
DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir);
IndexReader reader = dw.writer.getReader();
dw.writer.commit();
verifyEquals(random, reader, dir, "id");
reader.close();
dw.writer.close();
dir.close();
}
public void testRandom() throws Throwable {
Directory dir1 = newDirectory();
Directory dir2 = newDirectory();
// mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
int maxThreadStates = 1+random.nextInt(10);
boolean doReaderPooling = random.nextBoolean();
Map<String,Document> docs = indexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling);
indexSerial(random, docs, dir2);
// verifying verify
// verifyEquals(dir1, dir1, "id");
// verifyEquals(dir2, dir2, "id");
verifyEquals(dir1, dir2, "id");
dir1.close();
dir2.close();
}
public void testMultiConfig() throws Throwable {
// test lots of smaller different params together
int num = 3 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) { // increase iterations for better testing
if (VERBOSE) {
System.out.println("\n\nTEST: top iter=" + i);
}
sameFieldOrder=random.nextBoolean();
mergeFactor=random.nextInt(3)+2;
maxBufferedDocs=random.nextInt(3)+2;
int maxThreadStates = 1+random.nextInt(10);
boolean doReaderPooling = random.nextBoolean();
seed++;
int nThreads=random.nextInt(5)+1;
int iter=random.nextInt(5)+1;
int range=random.nextInt(20)+1;
Directory dir1 = newDirectory();
Directory dir2 = newDirectory();
if (VERBOSE) {
System.out.println(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor);
}
Map<String,Document> docs = indexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling);
if (VERBOSE) {
System.out.println("TEST: index serial");
}
indexSerial(random, docs, dir2);
if (VERBOSE) {
System.out.println("TEST: verify");
}
verifyEquals(dir1, dir2, "id");
dir1.close();
dir2.close();
}
}
static Term idTerm = new Term("id","");
IndexingThread[] threads;
static Comparator<Fieldable> fieldNameComparator = new Comparator() {
public int compare(Fieldable o1, Fieldable o2) {
return o1.name().compareTo(o2.name());
}
};
// This test avoids using any extra synchronization in the multiple
// indexing threads to test that IndexWriter does correctly synchronize
// everything.
public static class DocsAndWriter {
Map<String,Document> docs;
IndexWriter writer;
}
public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
Map<String,Document> docs = new HashMap();
IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB(
0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy()));
w.setInfoStream(VERBOSE ? System.out : null);
w.commit();
setUseCompoundFile(w.getConfig().getMergePolicy(), false);
setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
/***
w.setMaxMergeDocs(Integer.MAX_VALUE);
w.setMaxFieldLength(10000);
w.setRAMBufferSizeMB(1);
w.setMergeFactor(10);
***/
threads = new IndexingThread[nThreads];
for (int i=0; i<threads.length; i++) {
IndexingThread th = new IndexingThread();
th.w = w;
th.base = 1000000*i;
th.range = range;
th.iterations = iterations;
threads[i] = th;
}
for (int i=0; i<threads.length; i++) {
threads[i].start();
}
for (int i=0; i<threads.length; i++) {
threads[i].join();
}
// w.optimize();
//w.close();
for (int i=0; i<threads.length; i++) {
IndexingThread th = threads[i];
synchronized(th) {
docs.putAll(th.docs);
}
}
_TestUtil.checkIndex(dir);
DocsAndWriter dw = new DocsAndWriter();
dw.docs = docs;
dw.writer = w;
return dw;
}
public Map<String,Document> indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates,
boolean doReaderPooling) throws IOException, InterruptedException {
Map<String,Document> docs = new HashMap();
for(int iter=0;iter<3;iter++) {
if (VERBOSE) {
System.out.println("TEST: iter=" + iter);
}
IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE)
.setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates)
.setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()));
w.setInfoStream(VERBOSE ? System.out : null);
setUseCompoundFile(w.getConfig().getMergePolicy(), false);
setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor);
threads = new IndexingThread[nThreads];
for (int i=0; i<threads.length; i++) {
IndexingThread th = new IndexingThread();
th.w = w;
th.base = 1000000*i;
th.range = range;
th.iterations = iterations;
threads[i] = th;
}
for (int i=0; i<threads.length; i++) {
threads[i].start();
}
for (int i=0; i<threads.length; i++) {
threads[i].join();
}
// w.optimize();
w.close();
for (int i=0; i<threads.length; i++) {
IndexingThread th = threads[i];
synchronized(th) {
docs.putAll(th.docs);
}
}
}
_TestUtil.checkIndex(dir);
return docs;
}
public static void indexSerial(Random random, Map<String,Document> docs, Directory dir) throws IOException {
IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy()));
// index all docs in a single thread
Iterator<Document> iter = docs.values().iterator();
while (iter.hasNext()) {
Document d = iter.next();
ArrayList<Fieldable> fields = new ArrayList();
fields.addAll(d.getFields());
// put fields in same order each time
Collections.sort(fields, fieldNameComparator);
Document d1 = new Document();
d1.setBoost(d.getBoost());
for (int i=0; i<fields.size(); i++) {
d1.add(fields.get(i));
}
w.addDocument(d1);
// System.out.println("indexing "+d1);
}
w.close();
}
public static void verifyEquals(Random r, IndexReader r1, Directory dir2, String idField) throws Throwable {
IndexReader r2 = IndexReader.open(dir2);
verifyEquals(r1, r2, idField);
r2.close();
}
public static void verifyEquals(Directory dir1, Directory dir2, String idField) throws Throwable {
IndexReader r1 = IndexReader.open(dir1, true);
IndexReader r2 = IndexReader.open(dir2, true);
verifyEquals(r1, r2, idField);
r1.close();
r2.close();
}
private static void printDocs(IndexReader r) throws Throwable {
IndexReader[] subs = r.getSequentialSubReaders();
for(IndexReader sub : subs) {
System.out.println(" " + ((SegmentReader) sub).getSegmentInfo());
for(int docID=0;docID<sub.maxDoc();docID++) {
Document doc = sub.document(docID);
if (!sub.isDeleted(docID)) {
System.out.println(" docID=" + docID + " id:" + doc.get("id"));
} else {
System.out.println(" DEL docID=" + docID + " id:" + doc.get("id"));
}
}
}
}
public static void verifyEquals(IndexReader r1, IndexReader r2, String idField) throws Throwable {
if (VERBOSE) {
System.out.println("\nr1 docs:");
printDocs(r1);
System.out.println("\nr2 docs:");
printDocs(r2);
}
if (r1.numDocs() != r2.numDocs()) {
assert false: "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs();
}
boolean hasDeletes = !(r1.maxDoc()==r2.maxDoc() && r1.numDocs()==r1.maxDoc());
int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping
TermDocs termDocs1 = r1.termDocs();
TermDocs termDocs2 = r2.termDocs();
// create mapping from id2 space to id2 based on idField
idField = StringHelper.intern(idField);
TermEnum termEnum = r1.terms (new Term (idField, ""));
do {
Term term = termEnum.term();
if (term==null || term.field() != idField) break;
termDocs1.seek (termEnum);
if (!termDocs1.next()) {
// This doc is deleted and wasn't replaced
termDocs2.seek(termEnum);
assertFalse(termDocs2.next());
continue;
}
int id1 = termDocs1.doc();
assertFalse(termDocs1.next());
termDocs2.seek(termEnum);
assertTrue(termDocs2.next());
int id2 = termDocs2.doc();
assertFalse(termDocs2.next());
r2r1[id2] = id1;
// verify stored fields are equivalent
try {
verifyEquals(r1.document(id1), r2.document(id2));
} catch (Throwable t) {
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term="+ term);
System.out.println(" d1=" + r1.document(id1));
System.out.println(" d2=" + r2.document(id2));
throw t;
}
try {
// verify term vectors are equivalent
verifyEquals(r1.getTermFreqVectors(id1), r2.getTermFreqVectors(id2));
} catch (Throwable e) {
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
TermFreqVector[] tv1 = r1.getTermFreqVectors(id1);
System.out.println(" d1=" + tv1);
if (tv1 != null)
for(int i=0;i<tv1.length;i++)
System.out.println(" " + i + ": " + tv1[i]);
TermFreqVector[] tv2 = r2.getTermFreqVectors(id2);
System.out.println(" d2=" + tv2);
if (tv2 != null)
for(int i=0;i<tv2.length;i++)
System.out.println(" " + i + ": " + tv2[i]);
throw e;
}
} while (termEnum.next());
termEnum.close();
// Verify postings
TermEnum termEnum1 = r1.terms (new Term ("", ""));
TermEnum termEnum2 = r2.terms (new Term ("", ""));
// pack both doc and freq into single element for easy sorting
long[] info1 = new long[r1.numDocs()];
long[] info2 = new long[r2.numDocs()];
for(;;) {
Term term1,term2;
// iterate until we get some docs
int len1;
for(;;) {
len1=0;
term1 = termEnum1.term();
if (term1==null) break;
termDocs1.seek(termEnum1);
while (termDocs1.next()) {
int d1 = termDocs1.doc();
int f1 = termDocs1.freq();
info1[len1] = (((long)d1)<<32) | f1;
len1++;
}
if (len1>0) break;
if (!termEnum1.next()) break;
}
// iterate until we get some docs
int len2;
for(;;) {
len2=0;
term2 = termEnum2.term();
if (term2==null) break;
termDocs2.seek(termEnum2);
while (termDocs2.next()) {
int d2 = termDocs2.doc();
int f2 = termDocs2.freq();
info2[len2] = (((long)r2r1[d2])<<32) | f2;
len2++;
}
if (len2>0) break;
if (!termEnum2.next()) break;
}
if (!hasDeletes)
assertEquals(termEnum1.docFreq(), termEnum2.docFreq());
assertEquals(len1, len2);
if (len1==0) break; // no more terms
assertEquals(term1, term2);
// sort info2 to get it into ascending docid
Arrays.sort(info2, 0, len2);
// now compare
for (int i=0; i<len1; i++) {
assertEquals(info1[i], info2[i]);
}
termEnum1.next();
termEnum2.next();
}
}
public static void verifyEquals(Document d1, Document d2) {
List<Fieldable> ff1 = d1.getFields();
List<Fieldable> ff2 = d2.getFields();
Collections.sort(ff1, fieldNameComparator);
Collections.sort(ff2, fieldNameComparator);
assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size());
for (int i=0; i<ff1.size(); i++) {
Fieldable f1 = ff1.get(i);
Fieldable f2 = ff2.get(i);
if (f1.isBinary()) {
assert(f2.isBinary());
} else {
String s1 = f1.stringValue();
String s2 = f2.stringValue();
assertEquals(ff1 + " : " + ff2, s1,s2);
}
}
}
public static void verifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) {
if (d1 == null) {
assertTrue(d2 == null);
return;
}
assertTrue(d2 != null);
assertEquals(d1.length, d2.length);
for(int i=0;i<d1.length;i++) {
TermFreqVector v1 = d1[i];
TermFreqVector v2 = d2[i];
if (v1 == null || v2 == null)
System.out.println("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.length);
assertEquals(v1.size(), v2.size());
int numTerms = v1.size();
String[] terms1 = v1.getTerms();
String[] terms2 = v2.getTerms();
int[] freq1 = v1.getTermFrequencies();
int[] freq2 = v2.getTermFrequencies();
for(int j=0;j<numTerms;j++) {
if (!terms1[j].equals(terms2[j]))
assertEquals(terms1[j], terms2[j]);
assertEquals(freq1[j], freq2[j]);
}
if (v1 instanceof TermPositionVector) {
assertTrue(v2 instanceof TermPositionVector);
TermPositionVector tpv1 = (TermPositionVector) v1;
TermPositionVector tpv2 = (TermPositionVector) v2;
for(int j=0;j<numTerms;j++) {
int[] pos1 = tpv1.getTermPositions(j);
int[] pos2 = tpv2.getTermPositions(j);
if (pos1 == null) {
assertNull(pos2);
} else {
assertNotNull(pos1);
assertNotNull(pos2);
assertEquals(pos1.length, pos2.length);
TermVectorOffsetInfo[] offsets1 = tpv1.getOffsets(j);
TermVectorOffsetInfo[] offsets2 = tpv2.getOffsets(j);
if (offsets1 == null)
assertTrue(offsets2 == null);
else
assertTrue(offsets2 != null);
for(int k=0;k<pos1.length;k++) {
assertEquals(pos1[k], pos2[k]);
if (offsets1 != null) {
assertEquals(offsets1[k].getStartOffset(),
offsets2[k].getStartOffset());
assertEquals(offsets1[k].getEndOffset(),
offsets2[k].getEndOffset());
}
}
}
}
}
}
}
private class IndexingThread extends Thread {
IndexWriter w;
int base;
int range;
int iterations;
Map<String,Document> docs = new HashMap();
Random r;
public int nextInt(int lim) {
return r.nextInt(lim);
}
// start is inclusive and end is exclusive
public int nextInt(int start, int end) {
return start + r.nextInt(end-start);
}
char[] buffer = new char[100];
private int addUTF8Token(int start) {
final int end = start + nextInt(20);
if (buffer.length < 1+end) {
char[] newBuffer = new char[(int) ((1+end)*1.25)];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
for(int i=start;i<end;i++) {
int t = nextInt(6);
if (0 == t && i < end-1) {
// Make a surrogate pair
// High surrogate
buffer[i++] = (char) nextInt(0xd800, 0xdc00);
// Low surrogate
buffer[i] = (char) nextInt(0xdc00, 0xe000);
} else if (t <= 1)
buffer[i] = (char) nextInt(0x80);
else if (2 == t)
buffer[i] = (char) nextInt(0x80, 0x800);
else if (3 == t)
buffer[i] = (char) nextInt(0x800, 0xd800);
else if (4 == t)
buffer[i] = (char) nextInt(0xe000, 0xffff);
else if (5 == t) {
// Illegal unpaired surrogate
if (r.nextBoolean())
buffer[i] = (char) nextInt(0xd800, 0xdc00);
else
buffer[i] = (char) nextInt(0xdc00, 0xe000);
}
}
buffer[end] = ' ';
return 1+end;
}
public String getString(int nTokens) {
nTokens = nTokens!=0 ? nTokens : r.nextInt(4)+1;
// Half the time make a random UTF8 string
if (r.nextBoolean())
return getUTF8String(nTokens);
// avoid StringBuffer because it adds extra synchronization.
char[] arr = new char[nTokens*2];
for (int i=0; i<nTokens; i++) {
arr[i*2] = (char)('A' + r.nextInt(10));
arr[i*2+1] = ' ';
}
return new String(arr);
}
public String getUTF8String(int nTokens) {
int upto = 0;
Arrays.fill(buffer, (char) 0);
for(int i=0;i<nTokens;i++)
upto = addUTF8Token(upto);
return new String(buffer, 0, upto);
}
public String getIdString() {
return Integer.toString(base + nextInt(range));
}
public void indexDoc() throws IOException {
Document d = new Document();
ArrayList<Field> fields = new ArrayList();
String idString = getIdString();
Field idField = newField(idTerm.field(), idString, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
fields.add(idField);
int nFields = nextInt(maxFields);
for (int i=0; i<nFields; i++) {
Field.TermVector tvVal = Field.TermVector.NO;
switch (nextInt(4)) {
case 0:
tvVal = Field.TermVector.NO;
break;
case 1:
tvVal = Field.TermVector.YES;
break;
case 2:
tvVal = Field.TermVector.WITH_POSITIONS;
break;
case 3:
tvVal = Field.TermVector.WITH_POSITIONS_OFFSETS;
break;
}
switch (nextInt(4)) {
case 0:
fields.add(newField("f" + nextInt(100), getString(1), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, tvVal));
break;
case 1:
fields.add(newField("f" + nextInt(100), getString(0), Field.Store.NO, Field.Index.ANALYZED, tvVal));
break;
case 2:
fields.add(newField("f" + nextInt(100), getString(0), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
break;
case 3:
fields.add(newField("f" + nextInt(100), getString(bigFieldSize), Field.Store.YES, Field.Index.ANALYZED, tvVal));
break;
}
}
if (sameFieldOrder) {
Collections.sort(fields, fieldNameComparator);
} else {
// random placement of id field also
Collections.swap(fields,nextInt(fields.size()), 0);
}
for (int i=0; i<fields.size(); i++) {
d.add(fields.get(i));
}
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": indexing id:" + idString);
}
w.updateDocument(idTerm.createTerm(idString), d);
//System.out.println(Thread.currentThread().getName() + ": indexing "+d);
docs.put(idString, d);
}
public void deleteDoc() throws IOException {
String idString = getIdString();
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": del id:" + idString);
}
w.deleteDocuments(idTerm.createTerm(idString));
docs.remove(idString);
}
public void deleteByQuery() throws IOException {
String idString = getIdString();
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": del query id:" + idString);
}
w.deleteDocuments(new TermQuery(idTerm.createTerm(idString)));
docs.remove(idString);
}
@Override
public void run() {
try {
r = new Random(base+range+seed);
for (int i=0; i<iterations; i++) {
int what = nextInt(100);
if (what < 5) {
deleteDoc();
} else if (what < 10) {
deleteByQuery();
} else {
indexDoc();
}
}
} catch (Throwable e) {
e.printStackTrace();
Assert.fail(e.toString());
}
synchronized (this) {
docs.size();
}
}
}
}
Other Lucene examples (source code examples)
Here is a short list of links related to this Lucene TestStressIndexing2.java source code file:
|