|
Lucene example source code file (ContentSource.java)
The Lucene ContentSource.java source codepackage org.apache.lucene.benchmark.byTask.feeds; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import org.apache.lucene.benchmark.byTask.utils.Config; /** * Represents content from a specified source, such as TREC, Reuters etc. A * {@link ContentSource} is responsible for creating {@link DocData} objects for * its documents to be consumed by {@link DocMaker}. It also keeps track * of various statistics, such as how many documents were generated, size in * bytes etc. * <p> * Supports the following configuration parameters: * <ul> * <li>content.source.forever - specifies whether to generate documents * forever (<b>default=true). * <li>content.source.verbose - specifies whether messages should be * output by the content source (<b>default=false). * <li>content.source.encoding - specifies which encoding to use when * reading the files of that content source. Certain implementations may define * a default value if this parameter is not specified. (<b>default=null). * <li>content.source.log.step - specifies for how many documents a * message should be logged. If set to 0 it means no logging should occur. * <b>NOTE: if verbose is set to false, logging should not occur even if * logStep is not 0 (<b>default=0). * </ul> */ public abstract class ContentSource { private long bytesCount; private long totalBytesCount; private int docsCount; private int totalDocsCount; private Config config; protected boolean forever; protected int logStep; protected boolean verbose; protected String encoding; /** update count of bytes generated by this source */ protected final synchronized void addBytes(long numBytes) { bytesCount += numBytes; totalBytesCount += numBytes; } /** update count of documents generated by this source */ protected final synchronized void addDoc() { ++docsCount; ++totalDocsCount; } /** * A convenience method for collecting all the files of a content source from * a given directory. The collected {@link File} instances are stored in the * given <code>files. */ protected final void collectFiles(File dir, ArrayList<File> files) { if (!dir.canRead()) { return; } File[] dirFiles = dir.listFiles(); Arrays.sort(dirFiles); for (int i = 0; i < dirFiles.length; i++) { File file = dirFiles[i]; if (file.isDirectory()) { collectFiles(file, files); } else if (file.canRead()) { files.add(file); } } } /** * Returns true whether it's time to log a message (depending on verbose and * the number of documents generated). */ protected final boolean shouldLog() { return verbose && logStep > 0 && docsCount % logStep == 0; } /** Called when reading from this content source is no longer required. */ public abstract void close() throws IOException; /** Returns the number of bytes generated since last reset. */ public final long getBytesCount() { return bytesCount; } /** Returns the number of generated documents since last reset. */ public final int getDocsCount() { return docsCount; } public final Config getConfig() { return config; } /** Returns the next {@link DocData} from the content source. */ public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException; /** Returns the total number of bytes that were generated by this source. */ public final long getTotalBytesCount() { return totalBytesCount; } /** Returns the total number of generated documents. */ public final int getTotalDocsCount() { return totalDocsCount; } /** * Resets the input for this content source, so that the test would behave as * if it was just started, input-wise. * <p> * <b>NOTE: the default implementation resets the number of bytes and * documents generated since the last reset, so it's important to call * super.resetInputs in case you override this method. */ public void resetInputs() throws IOException { bytesCount = 0; docsCount = 0; } /** * Sets the {@link Config} for this content source. If you override this * method, you must call super.setConfig. */ public void setConfig(Config config) { this.config = config; forever = config.get("content.source.forever", true); logStep = config.get("content.source.log.step", 0); verbose = config.get("content.source.verbose", false); encoding = config.get("content.source.encoding", null); } } Other Lucene examples (source code examples)Here is a short list of links related to this Lucene ContentSource.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.