alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Other links

The source code

/*
 * $Header: /cvsroot/mvnforum/mvnforum/src/com/mvnforum/search/post/PostIndexer.java,v 1.6.2.1 2005/05/20 08:52:56 minhnn Exp $
 * $Author: minhnn $
 * $Revision: 1.6.2.1 $
 * $Date: 2005/05/20 08:52:56 $
 *
 * ====================================================================
 *
 * Copyright (C) 2002-2005 by MyVietnam.net
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or any later version.
 *
 * All copyright notices regarding mvnForum MUST remain intact
 * in the scripts and in the outputted HTML.
 * The "powered by" text/logo with a link back to
 * http://www.mvnForum.com and http://www.MyVietnam.net in the
 * footer of the pages MUST remain visible when the pages
 * are viewed on the internet or intranet.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * Support can be obtained from support forums at:
 * http://www.mvnForum.com/mvnforum/index
 *
 * Correspondence and Marketing Questions can be sent to:
 * info@MyVietnam.net
 *
 * @author: Minh Nguyen  minhnn@MyVietnam.net
 * @author: Dejan Krsmanovic dejan_krsmanovic@yahoo.com
 */
package com.mvnforum.search.post;

import java.io.IOException;

import com.mvnforum.MVNForumConfig;
import com.mvnforum.MVNForumFactoryConfig;
import com.mvnforum.db.PostBean;
import com.mvnforum.search.IntegerFilter;
import net.myvietnam.mvncore.exception.SearchException;
import net.myvietnam.mvncore.util.DateUtil;
import net.myvietnam.mvncore.util.TimerUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;

public class PostIndexer
{
    private static Log log = LogFactory.getLog(PostIndexer.class);

    //Field names (used for indexing)
    public static final String FIELD_POST_ID    = "postID";
    public static final String FIELD_THREAD_ID  = "threadID";
    public static final String FIELD_FORUM_ID   = "forumID";
    public static final String FIELD_MEMBER_ID  = "memberID";
    public static final String FIELD_POST_TOPIC = "postTopic";
    public static final String FIELD_POST_BODY  = "postBody";
    public static final String FIELD_POST_DATE  = "postDate";

    public static final String FIELD_WITH_ATTACHMENT = "withAttachment";

    public static final String FIELD_ATTACHMENT_COUNT = "attachmentCount";

    //public static final String PROPERTY_SEARCH_PATH      = "search.path";
    //public static final String PROPERTY_SEARCH_AUTOINDEX = "search.autoindex";

    //Timer is used for scheduling jobs
    private static Analyzer analyzer;
    private static String searchPostIndexDir;

    private static long lastOptimizeTime = 0;

    static {
        searchPostIndexDir = MVNForumConfig.getSearchPostIndexDir();
        initializeAnalyzer();
    }

    public static void scheduleAddPostTask(PostBean postBean) {
        AddUpdatePostIndexTask task = new AddUpdatePostIndexTask(postBean, AddUpdatePostIndexTask.OPERATION_ADD);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleUpdatePostTask(PostBean postBean) {
        AddUpdatePostIndexTask task = new AddUpdatePostIndexTask(postBean, AddUpdatePostIndexTask.OPERATION_UPDATE);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleDeletePostTask(int objectID, int objectType) {
        DeletePostIndexTask task = new DeletePostIndexTask(objectID, objectType);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleUpdateThreadTask(int threadID) {
        UpdateThreadTask task = new UpdateThreadTask(threadID);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleRebuildIndexTask() {
        int maxPostID = 0;
        RebuildPostIndexTask task = new RebuildPostIndexTask(maxPostID);
        TimerUtil.getInstance().schedule(task, 0);
    }

    static Analyzer getAnalyzer() {
        return analyzer;
    }

    /**
     * This class will load analyzer when starting. If specified analyzer class
     * cannot be loaded then default analyzer will be used.
     */
    private static void initializeAnalyzer() {
        String analyzerClassName = MVNForumFactoryConfig.getLuceneAnalyzerClassName();
        if ( (analyzerClassName == null) || (analyzerClassName.equals("")) ) {
            //create standard analyzer
            //String[] stopWords = this.loadStopWords();
            analyzer = new StandardAnalyzer();
            log.debug("Using StandardAnalyzer for indexing");
        } else {
            //try to create specified analyzer
            try {
                log.debug("About to load Analyzer [" + analyzerClassName + "] for indexing");
                analyzer = (Analyzer) Class.forName(analyzerClassName).newInstance();
            } catch (Exception e) {
                log.warn("Cannot load " + analyzerClassName + ". Loading StandardAnalyzer");
                analyzer = new StandardAnalyzer();
            }
        }
    }

    /**
     * This method is used for getting new IndexWriter. It can create new index
     * or add post to existing index. Creating new index will delete previous so it
     * should be used for rebuilding index.
     * @param create - true if new index should be created.
     *               - false for adding posts to existing index
     * @return IndexWriter object that is used for adding posts to index
     */
    static IndexWriter getIndexWriter(boolean create) throws SearchException {
        IndexWriter writer = null;

        //If create = false, we will create IndexWriter with false argument
        if (create == false) {
            try {
                writer = new IndexWriter(searchPostIndexDir, analyzer, false);
                writer.setUseCompoundFile(true);
                return writer;
            } catch (IOException e) {
                log.warn("Cannot open existed index. New index will be created.", e);
                //Ignore Exception. We will try to create index with true parameter
            }
        }
        // We are here in two cases: We wanted to create new index or because
        // index doesn't existed
        try {
            //This will create new index and delete existing
            writer = new IndexWriter(searchPostIndexDir, analyzer, true);
            writer.setUseCompoundFile(true);
            return writer;
        } catch (IOException e) {
            //@todo : localize me
            log.error("IOException during get index writer", e);
            throw new SearchException("Error while creating index writer");
        }
    }

    /**
     * This method is used for adding single post to index
     * Note: this method doesnt close the writer
     * @param post A post that should be indexed
     * @param writer IndexWriter that is used for storing
     * @throws SearchException
     */
    static void doIndexPost(PostBean post, IndexWriter writer) throws SearchException {

        if (post == null) return;
        //Post must include topic and body. If not then we have nothing to index.
        if ( (post.getPostTopic() == null || post.getPostTopic().equals("")) ||
             (post.getPostBody() == null || post.getPostBody().equals(""))) {
            return;
        }

        //Each post will be represented as a document
        Document postDocument = new Document();
        //Document has following fields that could be queried on
        postDocument.add(Field.Keyword(FIELD_POST_ID, Integer.toString(post.getPostID())));
        postDocument.add(Field.Keyword(FIELD_THREAD_ID, Integer.toString(post.getThreadID())));
        postDocument.add(Field.Keyword(FIELD_FORUM_ID, Integer.toString(post.getForumID())));
        postDocument.add(Field.Keyword(FIELD_MEMBER_ID, Integer.toString(post.getMemberID())));
        postDocument.add(Field.Keyword(FIELD_WITH_ATTACHMENT, new Boolean(post.getPostAttachCount()>0).toString()));// make it compilable on JDK 1.3
        postDocument.add(Field.Keyword(FIELD_ATTACHMENT_COUNT, IntegerFilter.intToString(post.getPostAttachCount())));
        //postDocument.add(Field.Keyword(FIELD_ATTACHMENT_COUNT, Integer.toString(post.getPostAttachCount())));

        //document body and title is not stored since we can retrieve them from database
        postDocument.add(Field.UnStored(FIELD_POST_TOPIC, post.getPostTopic()));
        postDocument.add(Field.UnStored(FIELD_POST_BODY, post.getPostBody()));
        //add date field
        postDocument.add(Field.Keyword(FIELD_POST_DATE, DateField.dateToString(post.getPostCreationDate())));

        //now we have created document with fields so we can store it
        try {
            writer.addDocument(postDocument);
        } catch (IOException e) {
            log.error("PostIndexer.doIndexPost failed", e);
            //@todo : localize me
            throw new SearchException("Error writing new post to index");
        }
    }

    /**
     * Add single post to index
     * @param post
     * @throws SearchException
     */
    static void addPostToIndex(PostBean post) throws SearchException, IOException {
        IndexWriter writer = null;
        try {
            writer = getIndexWriter(false);
            if (writer == null) {
                log.warn("Cannot get the IndexWriter");
                return;
            }
            doIndexPost(post, writer);

            // now check if we should optimize index (each hour)
            long now = System.currentTimeMillis();
            long timeFromLastOptimize = now - lastOptimizeTime;
            if (timeFromLastOptimize > DateUtil.HOUR) {
                log.debug("writer.optimize() called in addPostToIndex");
                writer.optimize();
                lastOptimizeTime = now;
            }
        } catch (SearchException ex) {
            throw ex;
        } finally {
            try {
                if (writer != null) {
                    writer.close();
                }
            } catch (IOException e) {
            }
        }
    }

    /**
     * This method is used for deleting post from index.
     * @param postID id of the post that should be deleted
     * @throws SearchException
     */
    static void deletePostFromIndex(int postID) throws SearchException {
        IndexReader reader = null;
        try {
            reader = IndexReader.open(searchPostIndexDir);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return;
            }

            Term term = new Term(FIELD_POST_ID, String.valueOf(postID));
            int deletedCount = reader.delete(term);
            log.debug("deletePostFromIndex: deleted posts = " + deletedCount);
        } catch (IOException e) {
            //@todo : localize me
            throw new SearchException("Error trying to delete post with postID = " + postID);
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
            } catch (IOException e) {
            }
        }
    }

    /**
     * This method is used for deleting all posts in a thread from index.
     * @param threadID id of the thread that should be deleted
     * @throws SearchException
     */
    static void deleteThreadFromIndex(int threadID) throws SearchException {
        IndexReader reader = null;
        try {
            reader = IndexReader.open(searchPostIndexDir);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return;
            }

            Term term = new Term(FIELD_THREAD_ID, String.valueOf(threadID));
            int deletedCount = reader.delete(term);
            log.debug("deleteThreadFromIndex: deleted posts = " + deletedCount);
        } catch (IOException e) {
            //@todo : localize me
            throw new SearchException("Error trying to delete posts in index with threadID = " + threadID);
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
            } catch (IOException e) {
            }
        }
    }

    /**
     * This method is used for deleting all posts in a forum from index.
     * @param forumID id of the forum that should be deleted
     * @throws SearchException
     */
    static void deleteForumFromIndex(int forumID) throws SearchException {
        IndexReader reader = null;
        try {
            reader = IndexReader.open(searchPostIndexDir);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return;
            }

            Term term = new Term(FIELD_FORUM_ID, String.valueOf(forumID));
            int deletedCount = reader.delete(term);
            log.debug("deleteForumFromIndex: deleted posts = " + deletedCount);
        } catch (IOException e) {
            //@todo : localize me
            throw new SearchException("Error trying to delete posts in index with forumID = " + forumID);
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
            } catch (IOException e) {
            }
        }
    }

    public static IndexSearcher getSearcher() throws IOException {
        try {
            IndexSearcher searcher = new IndexSearcher(searchPostIndexDir);
            return searcher;
        } catch (IOException ex) {
            // we throw new IOException because the original exception
            // contain sensitive directory information
            log.error("Cannot access the lucene search index for query. Please check if you have configed mvnForumHome properly. You can also go to Admin Zone to rebuild the Lucene index files.", ex);
            //@todo : localize me
            throw new IOException("Cannot access the lucene search index. Please report this error to web site Administrator (check mvnForumHome or rebuild Lucene index).");
        }
    }

    public static int getNumDocs() {
        int numDocs = -1;
        IndexReader reader = null;
        try {
            reader = IndexReader.open(searchPostIndexDir);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return -1;
            }
            numDocs = reader.numDocs();
        } catch ( IOException ioe) {
            //ignore
        } finally {
            try {
                if (reader != null) reader.close();
            } catch (IOException e) {
                log.debug("Error closing Lucene IndexReader", e);
            }
        }
        return numDocs;
    }

}
... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.