|
Lucene example source code file (TestIndexWriter.java)
The Lucene TestIndexWriter.java source codepackage org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Collections; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.SingleInstanceLockFactory; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; import org.apache.lucene.util.ThreadInterruptedException; public class TestIndexWriter extends LuceneTestCase { public void testDocCount() throws IOException { Directory dir = newDirectory(); IndexWriter writer = null; IndexReader reader = null; int i; long savedWriteLockTimeout = IndexWriterConfig.getDefaultWriteLockTimeout(); try { IndexWriterConfig.setDefaultWriteLockTimeout(2000); assertEquals(2000, IndexWriterConfig.getDefaultWriteLockTimeout()); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); } finally { IndexWriterConfig.setDefaultWriteLockTimeout(savedWriteLockTimeout); } // add 100 documents for (i = 0; i < 100; i++) { addDoc(writer); } assertEquals(100, writer.maxDoc()); writer.close(); // delete 40 documents reader = IndexReader.open(dir, false); for (i = 0; i < 40; i++) { reader.deleteDocument(i); } reader.close(); reader = IndexReader.open(dir, true); assertEquals(60, reader.numDocs()); reader.close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); assertEquals(60, writer.numDocs()); writer.optimize(); assertEquals(60, writer.maxDoc()); assertEquals(60, writer.numDocs()); writer.close(); // check that the index reader gives the same numbers. reader = IndexReader.open(dir, true); assertEquals(60, reader.maxDoc()); assertEquals(60, reader.numDocs()); reader.close(); // make sure opening a new index for create over // this existing one works correctly: writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); assertEquals(0, writer.maxDoc()); assertEquals(0, writer.numDocs()); writer.close(); dir.close(); } private void addDoc(IndexWriter writer) throws IOException { Document doc = new Document(); doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } private void addDocWithIndex(IndexWriter writer, int index) throws IOException { Document doc = new Document(); doc.add(newField("content", "aaa " + index, Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("id", "" + index, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).rollback(); String[] endFiles = dir.listAll(); Arrays.sort(startFiles); Arrays.sort(endFiles); if (!Arrays.equals(startFiles, endFiles)) { fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); } } private static class StringSplitAnalyzer extends Analyzer { @Override public TokenStream tokenStream(String fieldName, Reader reader) { return new StringSplitTokenizer(reader); } } private static class StringSplitTokenizer extends Tokenizer { private final String[] tokens; private int upto = 0; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public StringSplitTokenizer(Reader r) { try { final StringBuilder b = new StringBuilder(); final char[] buffer = new char[1024]; int n; while((n = r.read(buffer)) != -1) { b.append(buffer, 0, n); } tokens = b.toString().split(" "); } catch (IOException ioe) { throw new RuntimeException(ioe); } } @Override public final boolean incrementToken() throws IOException { clearAttributes(); if (upto < tokens.length) { termAtt.setEmpty(); termAtt.append(tokens[upto]); upto++; return true; } else { return false; } } } /** * Make sure we skip wicked long terms. */ public void testWickedLongTerm() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); char[] chars = new char[DocumentsWriter.CHAR_BLOCK_SIZE-1]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); // Max length term is 16383, so this contents produces // a too-long term: String contents = "abc xyz x" + bigTerm + " another term"; doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir, true); // Make sure all terms < max size were indexed assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(1, reader.docFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: TermPositions tps = reader.termPositions(new Term("content", "another")); assertTrue(tps.next()); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); reader.close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED)); StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT); sa.setMaxTokenLength(100000); writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(dir, true); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); } public void testOptimizeMaxNumSegments() throws IOException { MockDirectoryWrapper dir = newDirectory(); final Document doc = new Document(); doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); final int incrMin = TEST_NIGHTLY ? 15 : 40; for(int numDocs=10;numDocs<500;numDocs += _TestUtil.nextInt(random, incrMin, 5*incrMin)) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMinMergeDocs(1); ldmp.setMergeFactor(5); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(2).setMergePolicy( ldmp)); for(int j=0;j<numDocs;j++) writer.addDocument(doc); writer.close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); final int segCount = sis.size(); ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(5); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(ldmp)); writer.optimize(3); writer.close(); sis = new SegmentInfos(); sis.read(dir); final int optSegCount = sis.size(); if (segCount < 3) assertEquals(segCount, optSegCount); else assertEquals(3, optSegCount); } dir.close(); } public void testOptimizeMaxNumSegments2() throws IOException { MockDirectoryWrapper dir = newDirectory(); final Document doc = new Document(); doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMinMergeDocs(1); ldmp.setMergeFactor(4); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergePolicy(ldmp).setMergeScheduler(new ConcurrentMergeScheduler())); for(int iter=0;iter<10;iter++) { for(int i=0;i<19;i++) writer.addDocument(doc); writer.commit(); writer.waitForMerges(); writer.commit(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); final int segCount = sis.size(); writer.optimize(7); writer.commit(); writer.waitForMerges(); sis = new SegmentInfos(); sis.read(dir); final int optSegCount = sis.size(); if (segCount < 7) assertEquals(segCount, optSegCount); else assertEquals(7, optSegCount); } writer.close(); dir.close(); } /** * Make sure optimize doesn't use any more than 1X * starting index size as its temporary free space * required. */ public void testOptimizeTempSpaceUsage() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10).setMergePolicy(newLogMergePolicy())); if (VERBOSE) { System.out.println("TEST: config1=" + writer.getConfig()); } for(int j=0;j<500;j++) { addDocWithIndex(writer, j); } final int termIndexInterval = writer.getConfig().getTermIndexInterval(); // force one extra segment w/ different doc store so // we see the doc stores get merged writer.commit(); addDocWithIndex(writer, 500); writer.close(); if (VERBOSE) { System.out.println("TEST: start disk usage"); } long startDiskUsage = 0; String[] files = dir.listAll(); for(int i=0;i<files.length;i++) { startDiskUsage += dir.fileLength(files[i]); if (VERBOSE) { System.out.println(files[i] + ": " + dir.fileLength(files[i])); } } dir.resetMaxUsedSizeInBytes(); dir.setTrackDiskUsage(true); // Import to use same term index interval else a // smaller one here could increase the disk usage and // cause a false failure: writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval).setMergePolicy(newLogMergePolicy())); writer.setInfoStream(VERBOSE ? System.out : null); writer.optimize(); writer.close(); long maxDiskUsage = dir.getMaxUsedSizeInBytes(); assertTrue("optimize used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (4*startDiskUsage) + " (= 4X starting usage)", maxDiskUsage <= 4*startDiskUsage); dir.close(); } static String arrayToString(String[] l) { String s = ""; for(int i=0;i<l.length;i++) { if (i > 0) { s += "\n "; } s += l[i]; } return s; } // Make sure we can open an index for create even when a // reader holds it open (this fails pre lock-less // commits on windows): public void testCreateWithReader() throws IOException { Directory dir = newDirectory(); // add one document & close writer IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(writer); writer.close(); // now open reader: IndexReader reader = IndexReader.open(dir, true); assertEquals("should be one document", reader.numDocs(), 1); // now open index for create: writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); assertEquals("should be zero documents", writer.maxDoc(), 0); addDoc(writer); writer.close(); assertEquals("should be one document", reader.numDocs(), 1); IndexReader reader2 = IndexReader.open(dir, true); assertEquals("should be one document", reader2.numDocs(), 1); reader.close(); reader2.close(); dir.close(); } public void testChangesAfterClose() throws IOException { Directory dir = newDirectory(); IndexWriter writer = null; writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(writer); // close writer.close(); try { addDoc(writer); fail("did not hit AlreadyClosedException"); } catch (AlreadyClosedException e) { // expected } dir.close(); } /* * Simple test for "commit on close": open writer then * add a bunch of docs, making sure reader does not see * these docs until writer is closed. */ public void testCommitOnClose() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); searcher.close(); IndexReader reader = IndexReader.open(dir, true); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for(int i=0;i<3;i++) { for(int j=0;j<11;j++) { addDoc(writer); } searcher = new IndexSearcher(dir, false); hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("reader incorrectly sees changes from writer", 14, hits.length); searcher.close(); assertTrue("reader should have still been current", reader.isCurrent()); } // Now, close the writer: writer.close(); assertFalse("reader should not be current now", reader.isCurrent()); searcher = new IndexSearcher(dir, false); hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("reader did not see changes after writer was closed", 47, hits.length); searcher.close(); reader.close(); dir.close(); } /* * Simple test for "commit on close": open writer, then * add a bunch of docs, making sure reader does not see * them until writer has closed. Then instead of * closing the writer, call abort and verify reader sees * nothing was added. Then verify we can open the index * and add docs to it. */ public void testCommitOnCloseAbort() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); searcher.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(10)); for(int j=0;j<17;j++) { addDoc(writer); } // Delete all docs: writer.deleteDocuments(searchTerm); searcher = new IndexSearcher(dir, false); hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("reader incorrectly sees changes from writer", 14, hits.length); searcher.close(); // Now, close the writer: writer.rollback(); assertNoUnreferencedFiles(dir, "unreferenced files remain after rollback()"); searcher = new IndexSearcher(dir, false); hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("saw changes after writer.abort", 14, hits.length); searcher.close(); // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(10)); // On abort, writer in fact may write to the same // segments_N file: dir.setPreventDoubleWrite(false); for(int i=0;i<12;i++) { for(int j=0;j<17;j++) { addDoc(writer); } searcher = new IndexSearcher(dir, false); hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("reader incorrectly sees changes from writer", 14, hits.length); searcher.close(); } writer.close(); searcher = new IndexSearcher(dir, false); hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("didn't see changes after close", 218, hits.length); searcher.close(); dir.close(); } /* * Verify that a writer with "commit on close" indeed * cleans up the temp segments created after opening * that are not referenced by the starting segments * file. We check this by using MockDirectoryWrapper to * measure max temp disk space used. */ public void testCommitOnCloseDiskUsage() throws IOException { MockDirectoryWrapper dir = newDirectory(); Analyzer analyzer; if (random.nextBoolean()) { // no payloads analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); } else { // fixed length payloads final int length = random.nextInt(200); analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { return new MockFixedLengthPayloadFilter(random, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), length); } }; } IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer). setMaxBufferedDocs(10). setReaderPooling(false). setMergePolicy(newLogMergePolicy(10)) ); for(int j=0;j<30;j++) { addDocWithIndex(writer, j); } writer.close(); dir.resetMaxUsedSizeInBytes(); dir.setTrackDiskUsage(true); long startDiskUsage = dir.getMaxUsedSizeInBytes(); writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer) .setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(10). setMergeScheduler(new SerialMergeScheduler()). setReaderPooling(false). setMergePolicy(newLogMergePolicy(10)) ); for(int j=0;j<1470;j++) { addDocWithIndex(writer, j); } long midDiskUsage = dir.getMaxUsedSizeInBytes(); dir.resetMaxUsedSizeInBytes(); writer.optimize(); writer.close(); IndexReader.open(dir, true).close(); long endDiskUsage = dir.getMaxUsedSizeInBytes(); // Ending index is 50X as large as starting index; due // to 3X disk usage normally we allow 150X max // transient usage. If something is wrong w/ deleter // and it doesn't delete intermediate segments then it // will exceed this 150X: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage); assertTrue("writer used too much space while adding documents: mid=" + midDiskUsage + " start=" + startDiskUsage + " end=" + endDiskUsage + " max=" + (startDiskUsage*150), midDiskUsage < 150*startDiskUsage); assertTrue("writer used too much space after close: endDiskUsage=" + endDiskUsage + " startDiskUsage=" + startDiskUsage + " max=" + (startDiskUsage*150), endDiskUsage < 150*startDiskUsage); dir.close(); } /* * Verify that calling optimize when writer is open for * "commit on close" works correctly both for rollback() * and close(). */ public void testCommitOnCloseOptimize() throws IOException { MockDirectoryWrapper dir = newDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once dir.setPreventDoubleWrite(false); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(10). setMergePolicy(newLogMergePolicy(10)) ); for(int j=0;j<17;j++) { addDocWithIndex(writer, j); } writer.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.optimize(); if (VERBOSE) { writer.setInfoStream(System.out); } // Open a reader before closing (commiting) the writer: IndexReader reader = IndexReader.open(dir, true); // Reader should see index as unoptimized at this // point: assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); reader.close(); // Abort the writer: writer.rollback(); assertNoUnreferencedFiles(dir, "aborted writer after optimize"); // Open a reader after aborting writer: reader = IndexReader.open(dir, true); // Reader should still see index as unoptimized: assertFalse("Reader incorrectly sees that the index is optimized", reader.isOptimized()); reader.close(); if (VERBOSE) { System.out.println("TEST: do real optimize"); } writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); if (VERBOSE) { writer.setInfoStream(System.out); } writer.optimize(); writer.close(); if (VERBOSE) { System.out.println("TEST: writer closed"); } assertNoUnreferencedFiles(dir, "aborted writer after optimize"); // Open a reader after aborting writer: reader = IndexReader.open(dir, true); // Reader should still see index as unoptimized: assertTrue("Reader incorrectly sees that the index is unoptimized", reader.isOptimized()); reader.close(); dir.close(); } public void testIndexNoDocuments() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); writer.close(); IndexReader reader = IndexReader.open(dir, true); assertEquals(0, reader.maxDoc()); assertEquals(0, reader.numDocs()); reader.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); writer.commit(); writer.close(); reader = IndexReader.open(dir, true); assertEquals(0, reader.maxDoc()); assertEquals(0, reader.numDocs()); reader.close(); dir.close(); } public void testManyFields() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for(int j=0;j<100;j++) { Document doc = new Document(); doc.add(newField("a"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("b"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("c"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("d"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("e"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("f"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.close(); IndexReader reader = IndexReader.open(dir, true); assertEquals(100, reader.maxDoc()); assertEquals(100, reader.numDocs()); for(int j=0;j<100;j++) { assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j))); assertEquals(1, reader.docFreq(new Term("d"+j, "aaa"))); assertEquals(1, reader.docFreq(new Term("e"+j, "aaa"))); assertEquals(1, reader.docFreq(new Term("f"+j, "aaa"))); } reader.close(); dir.close(); } public void testSmallRAMBuffer() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). setRAMBufferSizeMB(0.000001). setMergePolicy(newLogMergePolicy(10)) ); int lastNumFile = dir.listAll().length; for(int j=0;j<9;j++) { Document doc = new Document(); doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); int numFile = dir.listAll().length; // Verify that with a tiny RAM buffer we see new // segment after every doc assertTrue(numFile > lastNumFile); lastNumFile = numFile; } writer.close(); dir.close(); } /** * Make sure it's OK to change RAM buffer size and // maxBufferedDocs in a * write session * * @deprecated after all the setters on IW go away (4.0), this test can be * removed because changing ram buffer settings during a write * session won't be possible. */ @Deprecated public void testChangingRAMBuffer() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH)); int lastFlushCount = -1; for(int j=1;j<52;j++) { Document doc = new Document(); doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); if (j == 1) lastFlushCount = flushCount; else if (j < 10) // No new files should be created assertEquals(flushCount, lastFlushCount); else if (10 == j) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (j < 20) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; } else if (20 == j) { writer.setRAMBufferSizeMB(16); writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 30) { assertEquals(flushCount, lastFlushCount); } else if (30 == j) { writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (j < 40) { assertTrue(flushCount> lastFlushCount); lastFlushCount = flushCount; } else if (40 == j) { writer.setMaxBufferedDocs(10); writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 50) { assertEquals(flushCount, lastFlushCount); writer.setMaxBufferedDocs(10); writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (50 == j) { assertTrue(flushCount > lastFlushCount); } } writer.close(); dir.close(); } /** * @deprecated after setters on IW go away, this test can be deleted because * changing those settings on IW won't be possible. */ @Deprecated public void testChangingRAMBuffer2() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10).setMaxBufferedDeleteTerms( 10).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)); for(int j=1;j<52;j++) { Document doc = new Document(); doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } int lastFlushCount = -1; for(int j=1;j<52;j++) { writer.deleteDocuments(new Term("field", "aaa" + j)); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); if (j == 1) lastFlushCount = flushCount; else if (j < 10) { // No new files should be created assertEquals(flushCount, lastFlushCount); } else if (10 == j) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDeleteTerms(1); } else if (j < 20) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; } else if (20 == j) { writer.setRAMBufferSizeMB(16); writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 30) { assertEquals(flushCount, lastFlushCount); } else if (30 == j) { writer.setRAMBufferSizeMB(0.000001); writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); writer.setMaxBufferedDeleteTerms(1); } else if (j < 40) { assertTrue(flushCount> lastFlushCount); lastFlushCount = flushCount; } else if (40 == j) { writer.setMaxBufferedDeleteTerms(10); writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 50) { assertEquals(flushCount, lastFlushCount); writer.setMaxBufferedDeleteTerms(10); writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (50 == j) { assertTrue(flushCount > lastFlushCount); } } writer.close(); dir.close(); } // Make sure it's OK to change RAM buffer size and // maxBufferedDocs in a write session, using IW.getConfig() public void testChangingRAMBufferWithIWC() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.getConfig().setMaxBufferedDocs(10); writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); int lastFlushCount = -1; for(int j=1;j<52;j++) { Document doc = new Document(); doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); if (j == 1) lastFlushCount = flushCount; else if (j < 10) // No new files should be created assertEquals(flushCount, lastFlushCount); else if (10 == j) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (j < 20) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; } else if (20 == j) { writer.getConfig().setRAMBufferSizeMB(16); writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 30) { assertEquals(flushCount, lastFlushCount); } else if (30 == j) { writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (j < 40) { assertTrue(flushCount> lastFlushCount); lastFlushCount = flushCount; } else if (40 == j) { writer.getConfig().setMaxBufferedDocs(10); writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 50) { assertEquals(flushCount, lastFlushCount); writer.getConfig().setMaxBufferedDocs(10); writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (50 == j) { assertTrue(flushCount > lastFlushCount); } } writer.close(); dir.close(); } public void testChangingRAMBuffer2WithIWC() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.getConfig().setMaxBufferedDocs(10); writer.getConfig().setMaxBufferedDeleteTerms(10); writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); for(int j=1;j<52;j++) { Document doc = new Document(); doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } int lastFlushCount = -1; for(int j=1;j<52;j++) { writer.deleteDocuments(new Term("field", "aaa" + j)); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); if (j == 1) lastFlushCount = flushCount; else if (j < 10) { // No new files should be created assertEquals(flushCount, lastFlushCount); } else if (10 == j) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDeleteTerms(1); } else if (j < 20) { assertTrue(flushCount > lastFlushCount); lastFlushCount = flushCount; } else if (20 == j) { writer.getConfig().setRAMBufferSizeMB(16); writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 30) { assertEquals(flushCount, lastFlushCount); } else if (30 == j) { writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); writer.getConfig().setMaxBufferedDeleteTerms(1); } else if (j < 40) { assertTrue(flushCount> lastFlushCount); lastFlushCount = flushCount; } else if (40 == j) { writer.getConfig().setMaxBufferedDeleteTerms(10); writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); lastFlushCount = flushCount; } else if (j < 50) { assertEquals(flushCount, lastFlushCount); writer.getConfig().setMaxBufferedDeleteTerms(10); writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); } else if (50 == j) { assertTrue(flushCount > lastFlushCount); } } writer.close(); dir.close(); } public void testDiverseDocs() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5)); for(int i=0;i<3;i++) { // First, docs where every term is unique (heavy on // Posting instances) for(int j=0;j<100;j++) { Document doc = new Document(); for(int k=0;k<100;k++) { doc.add(newField("field", Integer.toString(random.nextInt()), Field.Store.YES, Field.Index.ANALYZED)); } writer.addDocument(doc); } // Next, many single term docs where only one term // occurs (heavy on byte blocks) for(int j=0;j<100;j++) { Document doc = new Document(); doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } // Next, many single term docs where only one term // occurs but the terms are very long (heavy on // char[] arrays) for(int j=0;j<100;j++) { StringBuilder b = new StringBuilder(); String x = Integer.toString(j) + "."; for(int k=0;k<1000;k++) b.append(x); String longTerm = b.toString(); Document doc = new Document(); doc.add(newField("field", longTerm, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } } writer.close(); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1000).scoreDocs; assertEquals(300, hits.length); searcher.close(); dir.close(); } public void testEnablingNorms() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, pre flush for(int j=0;j<10;j++) { Document doc = new Document(); Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 8) { f.setOmitNorms(true); } doc.add(f); writer.addDocument(doc); } writer.close(); Term searchTerm = new Term("field", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals(10, hits.length); searcher.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, post flush for(int j=0;j<27;j++) { Document doc = new Document(); Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 26) { f.setOmitNorms(true); } doc.add(f); writer.addDocument(doc); } writer.close(); searcher = new IndexSearcher(dir, false); hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals(27, hits.length); searcher.close(); IndexReader reader = IndexReader.open(dir, true); reader.close(); dir.close(); } public void testHighFreqTerm() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's StringBuilder b = new StringBuilder(1024*1024); for(int i=0;i<4096;i++) { b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); } Document doc = new Document(); doc.add(newField("field", b.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir, true); assertEquals(1, reader.maxDoc()); assertEquals(1, reader.numDocs()); Term t = new Term("field", "a"); assertEquals(1, reader.docFreq(t)); TermDocs td = reader.termDocs(t); td.next(); assertEquals(128*1024, td.freq()); reader.close(); dir.close(); } // Make sure that a Directory implementation that does // not use LockFactory at all (ie overrides makeLock and // implements its own private locking) works OK. This // was raised on java-dev as loss of backwards // compatibility. public void testNullLockFactory() throws IOException { final class MyRAMDirectory extends MockDirectoryWrapper { private LockFactory myLockFactory; MyRAMDirectory(Directory delegate) { super(random, delegate); lockFactory = null; myLockFactory = new SingleInstanceLockFactory(); } @Override public Lock makeLock(String name) { return myLockFactory.makeLock(name); } } Directory dir = new MyRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 100; i++) { addDoc(writer); } writer.close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("did not get right number of hits", 100, hits.length); searcher.close(); writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE)); writer.close(); searcher.close(); dir.close(); } public void testFlushWithNoMerging() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(10)) ); Document doc = new Document(); doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for(int i=0;i<19;i++) writer.addDocument(doc); writer.flush(false, true); writer.close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); // Since we flushed w/o allowing merging we should now // have 10 segments assertEquals(10, sis.size()); dir.close(); } // Make sure we can flush segment w/ norms, then add // empty doc (no norms) and flush public void testEmptyDocAfterFlushingRealDoc() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); writer.commit(); if (VERBOSE) { System.out.println("\nTEST: now add empty doc"); } writer.addDocument(new Document()); writer.close(); _TestUtil.checkIndex(dir); IndexReader reader = IndexReader.open(dir, true); assertEquals(2, reader.numDocs()); reader.close(); dir.close(); } // Test calling optimize(false) whereby optimize is kicked // off but we don't wait for it to finish (but // writer.close()) does wait public void testBackgroundOptimize() throws IOException { Directory dir = newDirectory(); for(int pass=0;pass<2;pass++) { IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(2). setMergePolicy(newLogMergePolicy(51)) ); Document doc = new Document(); doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for(int i=0;i<100;i++) writer.addDocument(doc); writer.optimize(false); if (0 == pass) { writer.close(); IndexReader reader = IndexReader.open(dir, true); assertTrue(reader.isOptimized()); reader.close(); } else { // Get another segment to flush so we can verify it is // NOT included in the optimization writer.addDocument(doc); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir, true); assertTrue(!reader.isOptimized()); reader.close(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); assertEquals(2, infos.size()); } } dir.close(); } /** * Test that no NullPointerException will be raised, * when adding one document with a single, empty field * and term vectors enabled. * @throws IOException * */ public void testBadSegment() throws IOException { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document document = new Document(); document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); iw.addDocument(document); iw.close(); dir.close(); } // LUCENE-1036 public void testMaxThreadPriority() throws IOException { int pri = Thread.currentThread().getPriority(); try { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); Document document = new Document(); document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); Thread.currentThread().setPriority(Thread.MAX_PRIORITY); for(int i=0;i<4;i++) iw.addDocument(document); iw.close(); dir.close(); } finally { Thread.currentThread().setPriority(pri); } } // Just intercepts all merges & verifies that we are never // merging a segment with >= 20 (maxMergeDocs) docs private class MyMergeScheduler extends MergeScheduler { @Override synchronized public void merge(IndexWriter writer) throws CorruptIndexException, IOException { while(true) { MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge == null) { break; } for(int i=0;i<merge.segments.size();i++) { assert merge.segments.get(i).docCount < 20; } writer.merge(merge); } } @Override public void close() {} } // LUCENE-1013 public void testSetMaxMergeDocs() throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMergeScheduler(new MyMergeScheduler()).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setMaxMergeDocs(20); lmp.setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); iw.setInfoStream(VERBOSE ? System.out : null); Document document = new Document(); document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); for(int i=0;i<177;i++) iw.addDocument(document); iw.close(); dir.close(); } public void testVariableSchema() throws Exception { Directory dir = newDirectory(); int delID = 0; for(int i=0;i<20;i++) { if (VERBOSE) { System.out.println("TEST: iter=" + i); } IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); writer.setInfoStream(VERBOSE ? System.out : null); //LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); //lmp.setMergeFactor(2); //lmp.setUseCompoundFile(false); Document doc = new Document(); String contents = "aa bb cc dd ee ff gg hh ii jj kk"; if (i == 7) { // Add empty docs here doc.add(newField("content3", "", Field.Store.NO, Field.Index.ANALYZED)); } else { Field.Store storeVal; if (i%2 == 0) { doc.add(newField("content4", contents, Field.Store.YES, Field.Index.ANALYZED)); storeVal = Field.Store.YES; } else storeVal = Field.Store.NO; doc.add(newField("content1", contents, storeVal, Field.Index.ANALYZED)); doc.add(newField("content3", "", Field.Store.YES, Field.Index.ANALYZED)); doc.add(newField("content5", "", storeVal, Field.Index.ANALYZED)); } for(int j=0;j<4;j++) writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir, false); reader.deleteDocument(delID++); reader.close(); if (0 == i % 4) { writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); //LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy(); //lmp2.setUseCompoundFile(false); writer.optimize(); writer.close(); } } dir.close(); } public void testNoWaitClose() throws Throwable { Directory directory = newDirectory(); final Document doc = new Document(); Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(idField); for(int pass=0;pass<2;pass++) { if (VERBOSE) { System.out.println("TEST: pass=" + pass); } IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE) .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); if (pass == 2) { conf.setMergeScheduler(new SerialMergeScheduler()); } IndexWriter writer = new IndexWriter(directory, conf); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100); writer.setInfoStream(VERBOSE ? System.out : null); // have to use compound file to prevent running out of // descripters when newDirectory returns a file-system // backed directory: ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); for(int iter=0;iter<10;iter++) { if (VERBOSE) { System.out.println("TEST: iter=" + iter); } for(int j=0;j<199;j++) { idField.setValue(Integer.toString(iter*201+j)); writer.addDocument(doc); } int delID = iter*199; for(int j=0;j<20;j++) { writer.deleteDocuments(new Term("id", Integer.toString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2); final IndexWriter finalWriter = writer; final ArrayList<Throwable> failure = new ArrayList Other Lucene examples (source code examples)Here is a short list of links related to this Lucene TestIndexWriter.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.