|
Lucene example source code file (PatternParser.java)
The Lucene PatternParser.java source code
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.compound.hyphenation;
// SAX
import org.xml.sax.XMLReader;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.Attributes;
// Java
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import javax.xml.parsers.SAXParserFactory;
/**
* A SAX document handler to read and parse hyphenation patterns from a XML
* file.
*
* This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
*/
public class PatternParser extends DefaultHandler implements PatternConsumer {
XMLReader parser;
int currElement;
PatternConsumer consumer;
StringBuilder token;
ArrayList<Object> exception;
char hyphenChar;
String errMsg;
static final int ELEM_CLASSES = 1;
static final int ELEM_EXCEPTIONS = 2;
static final int ELEM_PATTERNS = 3;
static final int ELEM_HYPHEN = 4;
public PatternParser() throws HyphenationException {
token = new StringBuilder();
parser = createParser();
parser.setContentHandler(this);
parser.setErrorHandler(this);
parser.setEntityResolver(this);
hyphenChar = '-'; // default
}
public PatternParser(PatternConsumer consumer) throws HyphenationException {
this();
this.consumer = consumer;
}
public void setConsumer(PatternConsumer consumer) {
this.consumer = consumer;
}
/**
* Parses a hyphenation pattern file.
*
* @param filename the filename
* @throws HyphenationException In case of an exception while parsing
*/
public void parse(String filename) throws HyphenationException {
parse(new InputSource(filename));
}
/**
* Parses a hyphenation pattern file.
*
* @param file the pattern file
* @throws HyphenationException In case of an exception while parsing
*/
public void parse(File file) throws HyphenationException {
try {
InputSource src = new InputSource(file.toURL().toExternalForm());
parse(src);
} catch (MalformedURLException e) {
throw new HyphenationException("Error converting the File '" + file
+ "' to a URL: " + e.getMessage());
}
}
/**
* Parses a hyphenation pattern file.
*
* @param source the InputSource for the file
* @throws HyphenationException In case of an exception while parsing
*/
public void parse(InputSource source) throws HyphenationException {
try {
parser.parse(source);
} catch (FileNotFoundException fnfe) {
throw new HyphenationException("File not found: " + fnfe.getMessage());
} catch (IOException ioe) {
throw new HyphenationException(ioe.getMessage());
} catch (SAXException e) {
throw new HyphenationException(errMsg);
}
}
/**
* Creates a SAX parser using JAXP
*
* @return the created SAX parser
*/
static XMLReader createParser() {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
return factory.newSAXParser().getXMLReader();
} catch (Exception e) {
throw new RuntimeException("Couldn't create XMLReader: " + e.getMessage());
}
}
protected String readToken(StringBuffer chars) {
String word;
boolean space = false;
int i;
for (i = 0; i < chars.length(); i++) {
if (Character.isWhitespace(chars.charAt(i))) {
space = true;
} else {
break;
}
}
if (space) {
// chars.delete(0,i);
for (int countr = i; countr < chars.length(); countr++) {
chars.setCharAt(countr - i, chars.charAt(countr));
}
chars.setLength(chars.length() - i);
if (token.length() > 0) {
word = token.toString();
token.setLength(0);
return word;
}
}
space = false;
for (i = 0; i < chars.length(); i++) {
if (Character.isWhitespace(chars.charAt(i))) {
space = true;
break;
}
}
token.append(chars.toString().substring(0, i));
// chars.delete(0,i);
for (int countr = i; countr < chars.length(); countr++) {
chars.setCharAt(countr - i, chars.charAt(countr));
}
chars.setLength(chars.length() - i);
if (space) {
word = token.toString();
token.setLength(0);
return word;
}
token.append(chars);
return null;
}
protected static String getPattern(String word) {
StringBuilder pat = new StringBuilder();
int len = word.length();
for (int i = 0; i < len; i++) {
if (!Character.isDigit(word.charAt(i))) {
pat.append(word.charAt(i));
}
}
return pat.toString();
}
protected ArrayList<Object> normalizeException(ArrayList> ex) {
ArrayList<Object> res = new ArrayList
Other Lucene examples (source code examples)Here is a short list of links related to this Lucene PatternParser.java source code file: |
Other websites by Alvin Alexander:
Life/living in Alaska (OneMansAlaska.com)
How I Sold My Business (HowISoldMyBusiness.com)
Copyright 1998-2011 Alvin Alexander, devdaily.com
All Rights Reserved.