|
Lucene example source code file (BooleanScorer.java)
The Lucene BooleanScorer.java source codepackage org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.List; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.BooleanClause.Occur; /* Description from Doug Cutting (excerpted from * LUCENE-1483): * * BooleanScorer uses a ~16k array to score windows of * docs. So it scores docs 0-16k first, then docs 16-32k, * etc. For each window it iterates through all query terms * and accumulates a score in table[doc%16k]. It also stores * in the table a bitmask representing which terms * contributed to the score. Non-zero scores are chained in * a linked list. At the end of scoring each window it then * iterates through the linked list and, if the bitmask * matches the boolean constraints, collects a hit. For * boolean queries with lots of frequent terms this can be * much faster, since it does not need to update a priority * queue for each posting, instead performing constant-time * operations per posting. The only downside is that it * results in hits being delivered out-of-order within the * window, which means it cannot be nested within other * scorers. But it works well as a top-level scorer. * * The new BooleanScorer2 implementation instead works by * merging priority queues of postings, albeit with some * clever tricks. For example, a pure conjunction (all terms * required) does not require a priority queue. Instead it * sorts the posting streams at the start, then repeatedly * skips the first to to the last. If the first ever equals * the last, then there's a hit. When some terms are * required and some terms are optional, the conjunction can * be evaluated first, then the optional terms can all skip * to the match and be added to the score. Thus the * conjunction can reduce the number of priority queue * updates for the optional terms. */ final class BooleanScorer extends Scorer { private static final class BooleanScorerCollector extends Collector { private BucketTable bucketTable; private int mask; private Scorer scorer; public BooleanScorerCollector(int mask, BucketTable bucketTable) { this.mask = mask; this.bucketTable = bucketTable; } @Override public void collect(final int doc) throws IOException { final BucketTable table = bucketTable; final int i = doc & BucketTable.MASK; Bucket bucket = table.buckets[i]; if (bucket == null) table.buckets[i] = bucket = new Bucket(); if (bucket.doc != doc) { // invalid bucket bucket.doc = doc; // set doc bucket.score = scorer.score(); // initialize score bucket.bits = mask; // initialize mask bucket.coord = 1; // initialize coord bucket.next = table.first; // push onto valid list table.first = bucket; } else { // valid bucket bucket.score += scorer.score(); // increment score bucket.bits |= mask; // add bits in mask bucket.coord++; // increment coord } } @Override public void setNextReader(IndexReader reader, int docBase) { // not needed by this implementation } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return true; } } // An internal class which is used in score(Collector, int) for setting the // current score. This is required since Collector exposes a setScorer method // and implementations that need the score will call scorer.score(). // Therefore the only methods that are implemented are score() and doc(). private static final class BucketScorer extends Scorer { float score; int doc = NO_MORE_DOCS; int freq; public BucketScorer(Weight weight) { super(weight); } @Override public int advance(int target) throws IOException { return NO_MORE_DOCS; } @Override public int docID() { return doc; } @Override public float freq() { return freq; } @Override public int nextDoc() throws IOException { return NO_MORE_DOCS; } @Override public float score() throws IOException { return score; } } static final class Bucket { int doc = -1; // tells if bucket is valid float score; // incremental score int bits; // used for bool constraints int coord; // count of terms in score Bucket next; // next valid bucket } /** A simple hash table of document scores within a range. */ static final class BucketTable { public static final int SIZE = 1 << 11; public static final int MASK = SIZE - 1; final Bucket[] buckets = new Bucket[SIZE]; Bucket first = null; // head of valid list public BucketTable() {} public Collector newCollector(int mask) { return new BooleanScorerCollector(mask, this); } public int size() { return SIZE; } } static final class SubScorer { public Scorer scorer; // TODO: re-enable this if BQ ever sends us required clauses //public boolean required = false; public boolean prohibited = false; public Collector collector; public SubScorer next; public SubScorer(Scorer scorer, boolean required, boolean prohibited, Collector collector, SubScorer next) throws IOException { if (required) { throw new IllegalArgumentException("this scorer cannot handle required=true"); } this.scorer = scorer; // TODO: re-enable this if BQ ever sends us required clauses //this.required = required; this.prohibited = prohibited; this.collector = collector; this.next = next; } } private SubScorer scorers = null; private BucketTable bucketTable = new BucketTable(); private final float[] coordFactors; // TODO: re-enable this if BQ ever sends us required clauses //private int requiredMask = 0; private int prohibitedMask = 0; private int nextMask = 1; private final int minNrShouldMatch; private int end; private Bucket current; private int doc = -1; BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch, List<Scorer> optionalScorers, List Other Lucene examples (source code examples)Here is a short list of links related to this Lucene BooleanScorer.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.