|
Lucene example source code file (NearSpansUnordered.java)
This example Lucene source code file (NearSpansUnordered.java) is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.
The Lucene NearSpansUnordered.java source code
package org.apache.lucene.search.spans;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
/**
* Similar to {@link NearSpansOrdered}, but for the unordered case.
*
* Expert:
* Only public for subclassing. Most implementations should not need this class
*/
public class NearSpansUnordered extends Spans {
private SpanNearQuery query;
private List<SpansCell> ordered = new ArrayList(); // spans in query order
private Spans[] subSpans;
private int slop; // from query
private SpansCell first; // linked list of spans
private SpansCell last; // sorted by doc only
private int totalLength; // sum of current lengths
private CellQueue queue; // sorted queue of spans
private SpansCell max; // max element in queue
private boolean more = true; // true iff not done
private boolean firstTime = true; // true before first next()
private class CellQueue extends PriorityQueue<SpansCell> {
public CellQueue(int size) {
initialize(size);
}
@Override
protected final boolean lessThan(SpansCell spans1, SpansCell spans2) {
if (spans1.doc() == spans2.doc()) {
return NearSpansOrdered.docSpansOrdered(spans1, spans2);
} else {
return spans1.doc() < spans2.doc();
}
}
}
/** Wraps a Spans, and can be used to form a linked list. */
private class SpansCell extends Spans {
private Spans spans;
private SpansCell next;
private int length = -1;
private int index;
public SpansCell(Spans spans, int index) {
this.spans = spans;
this.index = index;
}
@Override
public boolean next() throws IOException {
return adjust(spans.next());
}
@Override
public boolean skipTo(int target) throws IOException {
return adjust(spans.skipTo(target));
}
private boolean adjust(boolean condition) {
if (length != -1) {
totalLength -= length; // subtract old length
}
if (condition) {
length = end() - start();
totalLength += length; // add new length
if (max == null || doc() > max.doc()
|| (doc() == max.doc()) && (end() > max.end())) {
max = this;
}
}
more = condition;
return condition;
}
@Override
public int doc() { return spans.doc(); }
@Override
public int start() { return spans.start(); }
@Override
public int end() { return spans.end(); }
// TODO: Remove warning after API has been finalized
@Override
public Collection<byte[]> getPayload() throws IOException {
return new ArrayList<byte[]>(spans.getPayload());
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() {
return spans.isPayloadAvailable();
}
@Override
public String toString() { return spans.toString() + "#" + index; }
}
public NearSpansUnordered(SpanNearQuery query, IndexReader reader)
throws IOException {
this.query = query;
this.slop = query.getSlop();
SpanQuery[] clauses = query.getClauses();
queue = new CellQueue(clauses.length);
subSpans = new Spans[clauses.length];
for (int i = 0; i < clauses.length; i++) {
SpansCell cell =
new SpansCell(clauses[i].getSpans(reader), i);
ordered.add(cell);
subSpans[i] = cell.spans;
}
}
public Spans[] getSubSpans() {
return subSpans;
}
@Override
public boolean next() throws IOException {
if (firstTime) {
initList(true);
listToQueue(); // initialize queue
firstTime = false;
} else if (more) {
if (min().next()) { // trigger further scanning
queue.updateTop(); // maintain queue
} else {
more = false;
}
}
while (more) {
boolean queueStale = false;
if (min().doc() != max.doc()) { // maintain list
queueToList();
queueStale = true;
}
// skip to doc w/ all clauses
while (more && first.doc() < last.doc()) {
more = first.skipTo(last.doc()); // skip first upto last
firstToLast(); // and move it to the end
queueStale = true;
}
if (!more) return false;
// found doc w/ all clauses
if (queueStale) { // maintain the queue
listToQueue();
queueStale = false;
}
if (atMatch()) {
return true;
}
more = min().next();
if (more) {
queue.updateTop(); // maintain queue
}
}
return false; // no more matches
}
@Override
public boolean skipTo(int target) throws IOException {
if (firstTime) { // initialize
initList(false);
for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
more = cell.skipTo(target); // skip all
}
if (more) {
listToQueue();
}
firstTime = false;
} else { // normal case
while (more && min().doc() < target) { // skip as needed
if (min().skipTo(target)) {
queue.updateTop();
} else {
more = false;
}
}
}
return more && (atMatch() || next());
}
private SpansCell min() { return queue.top(); }
@Override
public int doc() { return min().doc(); }
@Override
public int start() { return min().start(); }
@Override
public int end() { return max.end(); }
// TODO: Remove warning after API has been finalized
/**
* WARNING: The List is not necessarily in order of the the positions
* @return Collection of <code>byte[] payloads
* @throws IOException
*/
@Override
public Collection<byte[]> getPayload() throws IOException {
Set<byte[]> matchPayload = new HashSet();
for (SpansCell cell = first; cell != null; cell = cell.next) {
if (cell.isPayloadAvailable()) {
matchPayload.addAll(cell.getPayload());
}
}
return matchPayload;
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() {
SpansCell pointer = min();
while (pointer != null) {
if (pointer.isPayloadAvailable()) {
return true;
}
pointer = pointer.next;
}
return false;
}
@Override
public String toString() {
return getClass().getName() + "("+query.toString()+")@"+
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
}
private void initList(boolean next) throws IOException {
for (int i = 0; more && i < ordered.size(); i++) {
SpansCell cell = ordered.get(i);
if (next)
more = cell.next(); // move to first entry
if (more) {
addToList(cell); // add to list
}
}
}
private void addToList(SpansCell cell) throws IOException {
if (last != null) { // add next to end of list
last.next = cell;
} else
first = cell;
last = cell;
cell.next = null;
}
private void firstToLast() {
last.next = first; // move first to end of list
last = first;
first = first.next;
last.next = null;
}
private void queueToList() throws IOException {
last = first = null;
while (queue.top() != null) {
addToList(queue.pop());
}
}
private void listToQueue() {
queue.clear(); // rebuild queue
for (SpansCell cell = first; cell != null; cell = cell.next) {
queue.add(cell); // add to queue from list
}
}
private boolean atMatch() {
return (min().doc() == max.doc())
&& ((max.end() - min().start() - totalLength) <= slop);
}
}
Other Lucene examples (source code examples)
Here is a short list of links related to this Lucene NearSpansUnordered.java source code file:
|