|
Lucene example source code file (TermInfosWriter.java)
The Lucene TermInfosWriter.java source codepackage org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.Closeable; import java.io.IOException; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.ArrayUtil; /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a Directory. A TermInfos can be written once, in order. */ final class TermInfosWriter implements Closeable { /** The file format version, a negative number. */ public static final int FORMAT = -3; // Changed strings to true utf8 with length-in-bytes not // length-in-chars public static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; // NOTE: always change this if you switch to a new format! public static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; private FieldInfos fieldInfos; private IndexOutput output; private TermInfo lastTi = new TermInfo(); private long size; // TODO: the default values for these two parameters should be settable from // IndexWriter. However, once that's done, folks will start setting them to // ridiculous values and complaining that things don't work well, as with // mergeFactor. So, let's wait until a number of folks find that alternate // values work better. Note that both of these values are stored in the // segment, so that it's safe to change these w/o rebuilding all indexes. /** Expert: The fraction of terms in the "dictionary" which should be stored * in RAM. Smaller values use more memory, but make searching slightly * faster, while larger values use less memory and make searching slightly * slower. Searching is typically not dominated by dictionary lookup, so * tweaking this is rarely useful.*/ int indexInterval = 128; /** Expert: The fraction of {@link TermDocs} entries stored in skip tables, * used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in * smaller indexes, greater acceleration, but fewer accelerable cases, while * smaller values result in bigger indexes, less acceleration and more * accelerable cases. More detailed experiments would be useful here. */ int skipInterval = 16; /** Expert: The maximum number of skip levels. Smaller values result in * slightly smaller indexes, but slower skipping in big posting lists. */ int maxSkipLevels = 10; private long lastIndexPointer; private boolean isIndex; private byte[] lastTermBytes = new byte[10]; private int lastTermBytesLength = 0; private int lastFieldNumber = -1; private TermInfosWriter other; private UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result(); TermInfosWriter(Directory directory, String segment, FieldInfos fis, int interval) throws IOException { initialize(directory, segment, fis, interval, false); boolean success = false; try { other = new TermInfosWriter(directory, segment, fis, interval, true); other.other = this; success = true; } finally { if (!success) { IOUtils.closeSafely(true, output, other); } } } private TermInfosWriter(Directory directory, String segment, FieldInfos fis, int interval, boolean isIndex) throws IOException { initialize(directory, segment, fis, interval, isIndex); } private void initialize(Directory directory, String segment, FieldInfos fis, int interval, boolean isi) throws IOException { indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis")); boolean success = false; try { output.writeInt(FORMAT_CURRENT); // write format output.writeLong(0); // leave space for size output.writeInt(indexInterval); // write indexInterval output.writeInt(skipInterval); // write skipInterval output.writeInt(maxSkipLevels); // write maxSkipLevels assert initUTF16Results(); success = true; } finally { if (!success) { IOUtils.closeSafely(true, output); } } } void add(Term term, TermInfo ti) throws IOException { UnicodeUtil.UTF16toUTF8(term.text, 0, term.text.length(), utf8Result); add(fieldInfos.fieldNumber(term.field), utf8Result.result, utf8Result.length, ti); } // Currently used only by assert statements UnicodeUtil.UTF16Result utf16Result1; UnicodeUtil.UTF16Result utf16Result2; // Currently used only by assert statements private boolean initUTF16Results() { utf16Result1 = new UnicodeUtil.UTF16Result(); utf16Result2 = new UnicodeUtil.UTF16Result(); return true; } // Currently used only by assert statement private int compareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength) { if (lastFieldNumber != fieldNumber) { final int cmp = fieldInfos.fieldName(lastFieldNumber).compareTo(fieldInfos.fieldName(fieldNumber)); // If there is a field named "" (empty string) then we // will get 0 on this comparison, yet, it's "OK". But // it's not OK if two different field numbers map to // the same name. if (cmp != 0 || lastFieldNumber != -1) return cmp; } UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1); UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2); final int len; if (utf16Result1.length < utf16Result2.length) len = utf16Result1.length; else len = utf16Result2.length; for(int i=0;i<len;i++) { final char ch1 = utf16Result1.result[i]; final char ch2 = utf16Result2.result[i]; if (ch1 != ch2) return ch1-ch2; } return utf16Result1.length - utf16Result2.length; } /** Adds a new < Other Lucene examples (source code examples)Here is a short list of links related to this Lucene TermInfosWriter.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.