|
Lucene example source code file (build.xml)
The Lucene build.xml source code<?xml version="1.0"?> <project name="benchmark" default="default"> <description> Lucene Benchmarking Contributions </description> <import file="../contrib-build.xml"/> <property name="working.dir" location="work"/> <!-- the tests have some parallel problems --> <property name="tests.threadspercpu" value="0"/> <contrib-uptodate name="highlighter" property="highlighter.uptodate" classpath.property="highlighter.jar"/> <contrib-uptodate name="icu" property="icu.uptodate" classpath.property="icu.jar"/> <!-- analyzers common needs a hack for the jar file: --> <contrib-uptodate name="analyzers/common" jarfile="${common.dir}/build/contrib/analyzers/common/lucene-analyzers-${version}.jar" property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/> <contrib-uptodate name="memory" property="memory.uptodate" classpath.property="memory.jar"/> <target name="check-files"> <available file="temp/news20.tar.gz" property="news20.exists"/> <available file="${working.dir}/20_newsgroup" property="news20.expanded"/> <available file="temp/reuters21578.tar.gz" property="reuters.exists"/> <available file="${working.dir}/reuters" property="reuters.expanded"/> <available file="${working.dir}/reuters-out" property="reuters.extracted"/> <available file="temp/20news-18828.tar.gz" property="20news-18828.exists"/> <available file="${working.dir}/20news-18828" property="20news-18828.expanded"/> <available file="${working.dir}/mini_newsgroups" property="mini.expanded"/> <available file="temp/enwiki-20070527-pages-articles.xml.bz2" property="enwiki.exists"/> <available file="temp/enwiki-20070527-pages-articles.xml" property="enwiki.expanded"/> <available file="${working.dir}/enwiki.txt" property="enwiki.extracted"/> <available file="temp/${top.100k.words.archive.filename}" property="top.100k.words.archive.present"/> <available file="${working.dir}/top100k-out" property="top.100k.word.files.expanded"/> </target> <target name="enwiki-files" depends="check-files"> <mkdir dir="temp"/> <antcall target="get-enwiki"/> <antcall target="expand-enwiki"/> </target> <target name="get-enwiki" unless="enwiki.exists"> <get src="http://people.apache.org/~gsingers/wikipedia/enwiki-20070527-pages-articles.xml.bz2" dest="temp/enwiki-20070527-pages-articles.xml.bz2"/> </target> <target name="expand-enwiki" unless="enwiki.expanded"> <bunzip2 src="temp/enwiki-20070527-pages-articles.xml.bz2" dest="temp"/> </target> <target name="get-news-20" unless="20news-18828.exists"> <get src="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz" dest="temp/news20.tar.gz"/> </target> <target name="get-reuters" unless="reuters.exists"> <get src="http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz" dest="temp/reuters21578.tar.gz"/> </target> <target name="expand-news-20" unless="news20.expanded"> <gunzip src="temp/news20.tar.gz" dest="temp"/> <untar src="temp/news20.tar" dest="${working.dir}"/> </target> <target name="expand-reuters" unless="reuters.expanded"> <gunzip src="temp/reuters21578.tar.gz" dest="temp"/> <mkdir dir="${working.dir}/reuters"/> <untar src="temp/reuters21578.tar" dest="${working.dir}/reuters"/> <delete > <fileset dir="${working.dir}/reuters"> <include name="*.txt"/> </fileset> </delete> </target> <target name="extract-reuters" depends="check-files" unless="reuters.extracted"> <java classname="org.apache.lucene.benchmark.utils.ExtractReuters" maxmemory="1024M" fork="true"> <classpath refid="run.classpath"/> <arg file="${working.dir}/reuters"/> <arg file="${working.dir}/reuters-out"/> </java> </target> <target name="get-20news-18828" unless="20news-18828.exists"> <get src="http://people.csail.mit.edu/u/j/jrennie/public_html/20Newsgroups/20news-18828.tar.gz" dest="temp/20news-18828.tar.gz"/> </target> <target name="expand-20news-18828" unless="20news-18828.expanded"> <gunzip src="temp/20news-18828.tar.gz" dest="temp"/> <untar src="temp/20news-18828.tar" dest="${working.dir}"/> </target> <target name="get-mini-news" unless="mini.exists"> <get src="http://kdd.ics.uci.edu/databases/20newsgroups/mini_newsgroups.tar.gz" dest="temp/mini_newsgroups.tar.gz"/> </target> <target name="expand-mini-news" unless="mini.expanded"> <gunzip src="temp/mini_newsgroups.tar.gz" dest="temp"/> <untar src="temp/mini_newsgroups.tar" dest="${working.dir}"/> </target> <property name="top.100k.words.archive.filename" value="top.100k.words.de.en.fr.uk.wikipedia.2009-11.tar.bz2"/> <property name="top.100k.words.archive.base.url" value="http://people.apache.org/~rmuir/wikipedia"/> <target name="get-top-100k-words-archive" unless="top.100k.words.archive.present"> <mkdir dir="temp"/> <get src="${top.100k.words.archive.base.url}/${top.100k.words.archive.filename}" dest="temp/${top.100k.words.archive.filename}"/> </target> <target name="expand-top-100k-word-files" unless="top.100k.word.files.expanded"> <mkdir dir="${working.dir}/top100k-out"/> <untar src="temp/${top.100k.words.archive.filename}" overwrite="true" compression="bzip2" dest="${working.dir}/top100k-out"/> </target> <target name="top-100k-wiki-word-files" depends="check-files"> <mkdir dir="${working.dir}"/> <antcall target="get-top-100k-words-archive"/> <antcall target="expand-top-100k-word-files"/> </target> <target name="get-files" depends="check-files"> <mkdir dir="temp"/> <antcall target="get-reuters"/> <antcall target="expand-reuters"/> <antcall target="extract-reuters"/> </target> <path id="classpath"> <pathelement path="${memory.jar}"/> <pathelement path="${highlighter.jar}"/> <pathelement path="${analyzers-common.jar}"/> <path refid="base.classpath"/> <fileset dir="lib"> <include name="**/*.jar"/> </fileset> </path> <path id="run.classpath"> <path refid="classpath"/> <pathelement location="${build.dir}/classes/java"/> <pathelement path="${benchmark.ext.classpath}"/> </path> <property name="task.alg" location="conf/micro-standard.alg"/> <property name="task.mem" value="140M"/> <target name="run-task" depends="compile,check-files,get-files" description="Run compound penalty perf test (optional: -Dtask.alg=your-algorithm-file -Dtask.mem=java-max-mem)"> <echo>Working Directory: ${working.dir} <java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" fork="true"> <classpath refid="run.classpath"/> <arg file="${task.alg}"/> </java> </target> <target name="enwiki" depends="compile,check-files,enwiki-files"> <echo>Working Directory: ${working.dir} <java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="1024M" fork="true"> <assertions> <enable/> </assertions> <classpath refid="run.classpath"/> <arg file="conf/extractWikipedia.alg"/> </java> </target> <property name="collation.alg.file" location="conf/collation.alg"/> <property name="collation.output.file" value="${working.dir}/collation.benchmark.output.txt"/> <property name="collation.jira.output.file" value="${working.dir}/collation.bm2jira.output.txt"/> <path id="collation.runtime.classpath"> <path refid="run.classpath"/> <pathelement path="${icu.jar}"/> <fileset dir="${common.dir}/contrib/icu/lib" includes="icu4j*.jar"/> </path> <target name="collation" depends="compile,compile-icu,top-100k-wiki-word-files"> <echo>Running contrib/benchmark with alg file: ${collation.alg.file} <java fork="true" classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" output="${collation.output.file}"> <classpath refid="collation.runtime.classpath"/> <arg file="${collation.alg.file}"/> </java> <echo>Benchmark output is in file: ${collation.output.file} <echo>Converting to JIRA table format... <exec executable="perl" output="${collation.jira.output.file}" failonerror="true"> <arg value="scripts/collation.bm2jira.pl"/> <arg value="${collation.output.file}"/> </exec> <echo>Benchmark output in JIRA table format is in file: ${collation.jira.output.file} </target> <property name="shingle.alg.file" location="conf/shingle.alg"/> <property name="shingle.output.file" value="${working.dir}/shingle.benchmark.output.txt"/> <property name="shingle.jira.output.file" value="${working.dir}/shingle.bm2jira.output.txt"/> <path id="shingle.runtime.classpath"> <path refid="run.classpath"/> </path> <target name="shingle" depends="compile,get-files"> <echo>Running contrib/benchmark with alg file: ${shingle.alg.file} <java fork="true" classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" output="${shingle.output.file}"> <classpath refid="run.classpath"/> <arg file="${shingle.alg.file}"/> </java> <echo>Benchmark output is in file: ${shingle.output.file} <echo>Converting to JIRA table format... <exec executable="perl" output="${shingle.jira.output.file}" failonerror="true"> <arg value="scripts/shingle.bm2jira.pl"/> <arg value="${shingle.output.file}"/> </exec> <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file} </target> <target name="compile-highlighter" unless="highlighter.uptodate"> <subant target="default"> <fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/> </subant> </target> <target name="compile-icu" unless="icu.uptodate"> <subant target="default"> <fileset dir="${common.dir}/contrib/icu" includes="build.xml"/> </subant> </target> <target name="compile-analyzers-common" unless="analyzers-common.uptodate"> <subant target="default"> <fileset dir="${common.dir}/contrib/analyzers/common" includes="build.xml"/> </subant> </target> <target name="compile-memory" unless="memory.uptodate"> <subant target="default"> <fileset dir="${common.dir}/contrib/memory" includes="build.xml"/> </subant> </target> <target name="init" depends="contrib-build.init,compile-memory,compile-highlighter,compile-analyzers-common"/> <target name="clean-javacc"> <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java"> <containsregexp expression="Generated.*By.*JavaCC"/> </fileset> </target> <target name="javacc" depends="init,javacc-check" if="javacc.present"> <invoke-javacc target="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj" outputDir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" /> </target> <target name="dist-maven" depends="contrib-build.dist-maven"> <m2-deploy-with-pom-template pom.xml="lib/lucene-xercesImpl-pom.xml.template" jar.file="lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar" /> </target> </project> Other Lucene examples (source code examples)Here is a short list of links related to this Lucene build.xml source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.