|
Lucene example source code file (PatternParser.java)
The Lucene PatternParser.java source code/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.compound.hyphenation; // SAX import org.xml.sax.XMLReader; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.Attributes; // Java import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; import javax.xml.parsers.SAXParserFactory; /** * A SAX document handler to read and parse hyphenation patterns from a XML * file. * * This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. */ public class PatternParser extends DefaultHandler implements PatternConsumer { XMLReader parser; int currElement; PatternConsumer consumer; StringBuilder token; ArrayList<Object> exception; char hyphenChar; String errMsg; static final int ELEM_CLASSES = 1; static final int ELEM_EXCEPTIONS = 2; static final int ELEM_PATTERNS = 3; static final int ELEM_HYPHEN = 4; public PatternParser() throws HyphenationException { token = new StringBuilder(); parser = createParser(); parser.setContentHandler(this); parser.setErrorHandler(this); parser.setEntityResolver(this); hyphenChar = '-'; // default } public PatternParser(PatternConsumer consumer) throws HyphenationException { this(); this.consumer = consumer; } public void setConsumer(PatternConsumer consumer) { this.consumer = consumer; } /** * Parses a hyphenation pattern file. * * @param filename the filename * @throws HyphenationException In case of an exception while parsing */ public void parse(String filename) throws HyphenationException { parse(new InputSource(filename)); } /** * Parses a hyphenation pattern file. * * @param file the pattern file * @throws HyphenationException In case of an exception while parsing */ public void parse(File file) throws HyphenationException { try { InputSource src = new InputSource(file.toURL().toExternalForm()); parse(src); } catch (MalformedURLException e) { throw new HyphenationException("Error converting the File '" + file + "' to a URL: " + e.getMessage()); } } /** * Parses a hyphenation pattern file. * * @param source the InputSource for the file * @throws HyphenationException In case of an exception while parsing */ public void parse(InputSource source) throws HyphenationException { try { parser.parse(source); } catch (FileNotFoundException fnfe) { throw new HyphenationException("File not found: " + fnfe.getMessage()); } catch (IOException ioe) { throw new HyphenationException(ioe.getMessage()); } catch (SAXException e) { throw new HyphenationException(errMsg); } } /** * Creates a SAX parser using JAXP * * @return the created SAX parser */ static XMLReader createParser() { try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); return factory.newSAXParser().getXMLReader(); } catch (Exception e) { throw new RuntimeException("Couldn't create XMLReader: " + e.getMessage()); } } protected String readToken(StringBuffer chars) { String word; boolean space = false; int i; for (i = 0; i < chars.length(); i++) { if (Character.isWhitespace(chars.charAt(i))) { space = true; } else { break; } } if (space) { // chars.delete(0,i); for (int countr = i; countr < chars.length(); countr++) { chars.setCharAt(countr - i, chars.charAt(countr)); } chars.setLength(chars.length() - i); if (token.length() > 0) { word = token.toString(); token.setLength(0); return word; } } space = false; for (i = 0; i < chars.length(); i++) { if (Character.isWhitespace(chars.charAt(i))) { space = true; break; } } token.append(chars.toString().substring(0, i)); // chars.delete(0,i); for (int countr = i; countr < chars.length(); countr++) { chars.setCharAt(countr - i, chars.charAt(countr)); } chars.setLength(chars.length() - i); if (space) { word = token.toString(); token.setLength(0); return word; } token.append(chars); return null; } protected static String getPattern(String word) { StringBuilder pat = new StringBuilder(); int len = word.length(); for (int i = 0; i < len; i++) { if (!Character.isDigit(word.charAt(i))) { pat.append(word.charAt(i)); } } return pat.toString(); } protected ArrayList<Object> normalizeException(ArrayList ex) { ArrayList<Object> res = new ArrayList |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.