|
Lucene example source code file (changes2html.pl)
This example Lucene source code file (changes2html.pl) is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.
The Lucene changes2html.pl source code
#!/usr/bin/perl
#
# Transforms Lucene Java's CHANGES.txt into Changes.html
#
# Input is on STDIN, output is to STDOUT
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
use strict;
use warnings;
my $project_info_url = 'https://issues.apache.org/jira/rest/api/2.0.alpha1/project/LUCENE';
my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
my $bugzilla_url_prefix = 'http://issues.apache.org/bugzilla/show_bug.cgi?id=';
my %release_dates = &setup_release_dates;
my $month_regex = &setup_month_regex;
my %month_nums = &setup_month_nums;
my %bugzilla_jira_map = &setup_bugzilla_jira_map;
my $title = undef;
my $release = undef;
my $reldate = undef;
my $relinfo = undef;
my $sections = undef;
my $items = undef;
my $first_relid = undef;
my $second_relid = undef;
my @releases = ();
my @lines = <>; # Get all input at once
#
# Parse input and build hierarchical release structure in @releases
#
for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
$_ = $lines[$line_num];
next unless (/\S/); # Skip blank lines
next if (/^\s*\$Id(?::.*)?\$/); # Skip $Id$ lines
unless ($title) {
if (/\S/) {
s/^\s+//; # Trim leading whitespace
s/\s+$//; # Trim trailing whitespace
}
s/^[^Ll]*//; # Trim leading BOM characters if exists
$title = $_;
next;
}
if (/\s*===+\s*(.*?)\s*===+\s*/) { # New-style release headings
$release = $1;
$release =~ s/^(?:release|lucene)\s*//i; # Trim "Release " or "Lucene " prefix
($release, $relinfo) = ($release =~ /^(\d+(?:\.(?:\d+|[xyz]))*|Trunk)\s*(.*)/i);
$relinfo =~ s/\s*:\s*$//; # Trim trailing colon
$relinfo =~ s/^\s*,\s*//; # Trim leading comma
($reldate, $relinfo) = get_release_date($release, $relinfo);
$sections = [];
push @releases, [ $release, $reldate, $relinfo, $sections ];
($first_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 0);
($second_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 1);
$items = undef;
next;
}
if (/^\s*([01](?:\.[0-9]{1,2}){1,2}[a-z]?(?:\s*(?:RC\d+|final))?)\s*
((?:200[0-7]-.*|.*,.*200[0-7].*)?)$/x) { # Old-style release heading
$release = $1;
$relinfo = $2;
$relinfo =~ s/\s*:\s*$//; # Trim trailing colon
$relinfo =~ s/^\s*,\s*//; # Trim leading comma
($reldate, $relinfo) = get_release_date($release, $relinfo);
$sections = [];
push @releases, [ $release, $reldate, $relinfo, $sections ];
$items = undef;
next;
}
# Section heading: no leading whitespace, initial word capitalized,
# five words or less, and no trailing punctuation
if (/^([A-Z]\S*(?:\s+\S+){0,4})(?<![-.:;!()])\s*$/) {
my $heading = $1;
$items = [];
push @$sections, [ $heading, $items ];
next;
}
# Handle earlier releases without sections - create a headless section
unless ($items) {
$items = [];
push @$sections, [ undef, $items ];
}
my $type;
if (@$items) { # A list item has been encountered in this section before
$type = $items->[0]; # 0th position of items array is list type
} else {
$type = get_list_type($_);
push @$items, $type;
}
if ($type eq 'numbered') { # The modern items list style
# List item boundary is another numbered item or an unindented line
my $line;
my $item = $_;
$item =~ s/^(\s{0,2}\d+\.\d?\s*)//; # Trim the leading item number
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
$item =~ s/\n+\Z/\n/; # Trim trailing blank lines
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} elsif ($type eq 'paragraph') { # List item boundary is a blank line
my $line;
my $item = $_;
$item =~ s/^(\s+)//;
my $leading_ws_width = defined($1) ? length($1) : 0;
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} else { # $type is one of the bulleted types
# List item boundary is another bullet or a blank line
my $line;
my $item = $_;
$item =~ s/^(\s*\Q$type\E\s*)//; # Trim the leading bullet
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^(?:\S|\s*\Q$type\E)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
push @$items, $item;
--$line_num unless ($line_num == $#lines);
}
}
# Recognize IDs of top level nodes of the most recent two releases,
# escaping JavaScript regex metacharacters, e.g.: "^(?:trunk|2\\\\.4\\\\.0)"
my $first_relid_regex = $first_relid;
$first_relid_regex =~ s!([.+*?{}()|^$/\[\]\\])!\\\\\\\\$1!g;
my $second_relid_regex = $second_relid;
$second_relid_regex =~ s!([.+*?{}()|^$/\[\]\\])!\\\\\\\\$1!g;
my $newer_version_regex = "^(?:$first_relid_regex|$second_relid_regex)";
#
# Print HTML-ified version to STDOUT
#
print<<"__HTML_HEADER__";
<!--
**********************************************************
** WARNING: This file is generated from CHANGES.txt by the
** Perl script 'changes2html.pl'.
** Do *not* edit this file!
**********************************************************
****************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
****************************************************************************
-->
<html>
<head>
<title>$title
<link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
<link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
<link rel="alternate stylesheet" href="ChangesFixedWidthStyle.css" title="Fixed Width">
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<SCRIPT>
function toggleList(id) {
listStyle = document.getElementById(id + '.list').style;
anchor = document.getElementById(id);
if (listStyle.display == 'none') {
listStyle.display = 'block';
anchor.title = 'Click to collapse';
location.href = '#' + id;
} else {
listStyle.display = 'none';
anchor.title = 'Click to expand';
}
var expandButton = document.getElementById('expand.button');
expandButton.disabled = false;
var collapseButton = document.getElementById('collapse.button');
collapseButton.disabled = false;
}
function collapseAll() {
var unorderedLists = document.getElementsByTagName("ul");
for (var i = 0; i < unorderedLists.length; i++) {
if (unorderedLists[i].className != 'bulleted-list')
unorderedLists[i].style.display = "none";
else
unorderedLists[i].style.display = "block";
}
var orderedLists = document.getElementsByTagName("ol");
for (var i = 0; i < orderedLists.length; i++)
orderedLists[i].style.display = "none";
var anchors = document.getElementsByTagName("a");
for (var i = 0 ; i < anchors.length; i++) {
if (anchors[i].id != '')
anchors[i].title = 'Click to expand';
}
var collapseButton = document.getElementById('collapse.button');
collapseButton.disabled = true;
var expandButton = document.getElementById('expand.button');
expandButton.disabled = false;
}
function expandAll() {
var unorderedLists = document.getElementsByTagName("ul");
for (var i = 0; i < unorderedLists.length; i++)
unorderedLists[i].style.display = "block";
var orderedLists = document.getElementsByTagName("ol");
for (var i = 0; i < orderedLists.length; i++)
orderedLists[i].style.display = "block";
var anchors = document.getElementsByTagName("a");
for (var i = 0 ; i < anchors.length; i++) {
if (anchors[i].id != '')
anchors[i].title = 'Click to collapse';
}
var expandButton = document.getElementById('expand.button');
expandButton.disabled = true;
var collapseButton = document.getElementById('collapse.button');
collapseButton.disabled = false;
}
var newerRegex = new RegExp("$newer_version_regex");
function isOlder(listId) {
return ! newerRegex.test(listId);
}
function escapeMeta(s) {
return s.replace(/(?=[.*+?^\${}()|[\\]\\/\\\\])/g, '\\\\');
}
function shouldExpand(currentList, currentAnchor, listId) {
var listName = listId.substring(0, listId.length - 5);
var parentRegex = new RegExp("^" + escapeMeta(listName) + "\\\\.");
return currentList == listId
|| (isOlder(currentAnchor) && listId == 'older.list')
|| parentRegex.test(currentAnchor);
}
function collapse() {
/* Collapse all but the first and second releases. */
var unorderedLists = document.getElementsByTagName("ul");
var currentAnchor = location.hash.substring(1);
var currentList = currentAnchor + ".list";
for (var i = 0; i < unorderedLists.length; i++) {
var list = unorderedLists[i];
/* Collapse the current item, unless either the current item is one of
* the first two releases, or the current URL has a fragment and the
* fragment refers to the current item or one of its ancestors.
*/
if (list.id != '$first_relid.list'
&& list.id != '$second_relid.list'
&& list.className != 'bulleted-list'
&& (currentAnchor == ''
|| ! shouldExpand(currentList, currentAnchor, list.id))) {
list.style.display = "none";
}
}
var orderedLists = document.getElementsByTagName("ol");
for (var i = 0; i < orderedLists.length; i++) {
var list = orderedLists[i];
/* Collapse the current item, unless the current URL has a fragment
* and the fragment refers to the current item or one of its ancestors.
*/
if (currentAnchor == ''
|| ! shouldExpand(currentList, currentAnchor, list.id)) {
list.style.display = "none";
}
}
/* Add "Click to collapse/expand" tooltips to the release/section headings */
var anchors = document.getElementsByTagName("a");
for (var i = 0 ; i < anchors.length; i++) {
var anchor = anchors[i];
if (anchor.id != '') {
if (anchor.id == '$first_relid' || anchor.id == '$second_relid') {
anchor.title = 'Click to collapse';
} else {
anchor.title = 'Click to expand';
}
}
}
/* Insert "Expand All" and "Collapse All" buttons */
var buttonsParent = document.getElementById('buttons.parent');
var expandButton = document.createElement('button');
expandButton.appendChild(document.createTextNode('Expand All'));
expandButton.onclick = function() { expandAll(); }
expandButton.id = 'expand.button';
buttonsParent.appendChild(expandButton);
var collapseButton = document.createElement('button');
collapseButton.appendChild(document.createTextNode('Collapse All'));
collapseButton.onclick = function() { collapseAll(); }
collapseButton.id = 'collapse.button';
buttonsParent.appendChild(collapseButton);
}
window.onload = collapse;
</SCRIPT>
</head>
<body>
<h1>$title
<div id="buttons.parent">
__HTML_HEADER__
my $heading;
my $relcnt = 0;
my $header = 'h2';
for my $rel (@releases) {
if (++$relcnt == 3) {
$header = 'h3';
print "<h2>";
print "Older Releases";
print "</a>\n";
print "<ul id=\"older.list\">\n"
}
($release, $reldate, $relinfo, $sections) = @$rel;
# The first section heading is undefined for the older sectionless releases
my $has_release_sections = has_release_sections($sections);
(my $relid = lc($release)) =~ s/\s+/_/g;
print "<$header>";
print "Release " unless ($release =~ /^trunk$/i);
print "$release $relinfo";
print " [$reldate]" unless ($reldate eq 'unknown');
print "</a>$header>\n";
print "<ul id=\"$relid.list\">\n"
if ($has_release_sections);
for my $section (@$sections) {
($heading, $items) = @$section;
(my $sectid = lc($heading)) =~ s/\s+/_/g;
my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";
print " <li> tags intact and add
$item =~ s:(?<!code)>:>:gi; # wrappers for non-inline sections
$item =~ s{((?:^|.*\n)\s*)<code>(?!.+)(.+)(?![ \t]*\S)}
{
my $prefix = $1;
my $code = $2;
$code =~ s/\s+$//;
"$prefix<code>$code "
}gise;
# Put attributions on their own lines.
# Check for trailing parenthesized attribution with no following period.
# Exclude things like "(see #3 above)" and "(use the bug number instead of xxxx)"
unless ($item =~ s:\s*(\((?!see #|use the bug number)[^()"]+?\))\s*$:\n<br />$1:) {
# If attribution is not found, then look for attribution with a
# trailing period, but try not to include trailing parenthesized things
# that are not attributions.
#
# Rule of thumb: if a trailing parenthesized expression with a following
# period does not contain "LUCENE-XXX", and it either has three or
# fewer words or it includes the word "via" or the phrase "updates from",
# then it is considered to be an attribution.
$item =~ s{(\s*(\((?!see \#|use the bug number)[^()"]+?\)))
((?:\.|(?i:\.?\s*Issue\s+\d{3,}|LUCENE-\d+)\.?)\s*)$}
{
my $subst = $1; # default: no change
my $parenthetical = $2;
my $trailing_period_and_or_issue = $3;
if ($parenthetical !~ /LUCENE-\d+/) {
my ($no_parens) = $parenthetical =~ /^\((.*)\)$/s;
my @words = grep {/\S/} split /\s+/, $no_parens;
if ($no_parens =~ /\b(?:via|updates\s+from)\b/i || scalar(@words) <= 3) {
$subst = "\n<br />$parenthetical";
}
}
$subst . $trailing_period_and_or_issue;
}ex;
}
$item =~ s{(.*?)(<code>.*? )|(.*)}
{
my $uncode = undef;
if (defined($2)) {
$uncode = $1 || '';
$uncode =~ s{((?<=\n)[ ]*-.*\n(?:.*\n)*)}
{
my $bulleted_list = $1;
$bulleted_list
=~ s{(?:(?<=\n)|\A)[ ]*-[ ]*(.*(?:\n|\z)(?:[ ]+[^ -].*(?:\n|\z))*)}
{<li class="bulleted-list">\n$1\n}g;
$bulleted_list
=~ s!(<li.*\n$1\n!s;
$bulleted_list;
}ge;
"$uncode$2";
} else {
$uncode = $3 || '';
$uncode =~ s{((?<=\n)[ ]*-.*\n(?:.*\n)*)}
{
my $bulleted_list = $1;
$bulleted_list
=~ s{(?:(?<=\n)|\A)[ ]*-[ ]*(.*(?:\n|\z)(?:[ ]+[^ -].*(?:\n|\z))*)}
{<li class="bulleted-list">\n$1\n}g;
$bulleted_list
=~ s!(<li.*\n$1\n!s;
$bulleted_list;
}ge;
$uncode;
}
}sge;
$item =~ s:\n{2,}:\n<p/>\n:g; # Keep paragraph breaks
# Link LUCENE-XXX, SOLR-XXX and INFRA-XXX to JIRA
$item =~ s{(?:${jira_url_prefix})?((?:LUCENE|SOLR|INFRA)-\d+)}
{<a href="${jira_url_prefix}$1">$1 }g;
$item =~ s{(issue\s*\#?\s*(\d{3,}))} # Link Issue XXX to JIRA
{<a href="${jira_url_prefix}LUCENE-$2">$1}gi;
# Link Lucene XXX, SOLR XXX and INFRA XXX to JIRA
$item =~ s{((LUCENE|SOLR|INFRA)\s+(\d{3,}))}
{<a href="${jira_url_prefix}\U$2\E-$3">$1}gi;
# Find single Bugzilla issues
$item =~ s~((?i:bug|patch|issue)\s*\#?\s*(\d+))
~ my $issue = $1;
my $jira_issue_num = $bugzilla_jira_map{$2}; # Link to JIRA copies
$issue = qq!<a href="${jira_url_prefix}LUCENE-$jira_issue_num">!
. qq!$issue [LUCENE-$jira_issue_num]</a>!
if (defined($jira_issue_num));
$issue;
~gex;
# Find multiple Bugzilla issues
$item =~ s~(?<=(?i:bugs))(\s*)(\d+)(\s*(?i:\&|and)\s*)(\d+)
~ my $leading_whitespace = $1;
my $issue_num_1 = $2;
my $interlude = $3;
my $issue_num_2 = $4;
# Link to JIRA copies
my $jira_issue_1 = $bugzilla_jira_map{$issue_num_1};
my $issue1
= qq!<a href="${jira_url_prefix}LUCENE-$jira_issue_1">!
. qq!$issue_num_1 [LUCENE-$jira_issue_1]</a>!
if (defined($jira_issue_1));
my $jira_issue_2 = $bugzilla_jira_map{$issue_num_2};
my $issue2
= qq!<a href="${jira_url_prefix}LUCENE-$jira_issue_2">!
. qq!$issue_num_2 [LUCENE-$jira_issue_2]</a>!
if (defined($jira_issue_2));
$leading_whitespace . $issue1 . $interlude . $issue2;
~gex;
print " <li>$item\n";
}
print " </$list>\n" unless ($has_release_sections and not $heading);
print " </li>\n" if ($has_release_sections);
}
print "</ul>\n" if ($has_release_sections);
}
print "</ul>\n" if ($relcnt > 3);
print "</body>\n |