Java example - JBDiff.java

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" ^TM.
The source code

/*
 * Copyright (c) 2005, Joe Desbonnet, (jdesbonnet@gmail.com)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the <organization> nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY <copyright holder> ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package ie.wombat.jbdiff;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.zip.GZIPOutputStream;

/**
 * Java Binary Diff utility. Based on bsdiff (v4.2) by Colin Percival (see
 * http://www.daemonology.net/bsdiff/ ) and distributed under BSD license.
 * 
 * <p>
 * Running this on large files will probably require an increase of the default
 * maximum heap size (use java -Xmx200m)
 * </p>
 * 
 * @author Joe Desbonnet, joe@galway.net
 * 
 */
public class JBDiff {

	// JBDiff extensions by Stefan.Liebig@compeople.de:
	//
	// - uses GZIP compressor to compress ALL of the blocks (ctrl,diff,extra).
	// - added interfaces that allows using of JBDiff with streams and byte
	// arrays.

	private static final String VERSION = "jbdiff-0.1.0.1";

	// This is ´jbdiff40´.
	private static final byte[] MAGIC_BYTES = new byte[] { 0x6a, 0x62, 0x64,
			0x69, 0x66, 0x66, 0x34, 0x30 };

	private final static void split(int[] I, int[] V, int start, int len, int h) {

		int i, j, k, x, tmp, jj, kk;

		if (len < 16) {
			for (k = start; k < start + len; k += j) {
				j = 1;
				x = V[I[k] + h];
				for (i = 1; k + i < start + len; i++) {
					if (V[I[k + i] + h] < x) {
						x = V[I[k + i] + h];
						j = 0;
					}

					if (V[I[k + i] + h] == x) {
						tmp = I[k + j];
						I[k + j] = I[k + i];
						I[k + i] = tmp;
						j++;
					}

				}

				for (i = 0; i < j; i++) {
					V[I[k + i]] = k + j - 1;
				}
				if (j == 1) {
					I[k] = -1;
				}
			}

			return;
		}

		x = V[I[start + len / 2] + h];
		jj = 0;
		kk = 0;
		for (i = start; i < start + len; i++) {
			if (V[I[i] + h] < x) {
				jj++;
			}
			if (V[I[i] + h] == x) {
				kk++;
			}
		}

		jj += start;
		kk += jj;

		i = start;
		j = 0;
		k = 0;
		while (i < jj) {
			if (V[I[i] + h] < x) {
				i++;
			} else if (V[I[i] + h] == x) {
				tmp = I[i];
				I[i] = I[jj + j];
				I[jj + j] = tmp;
				j++;
			} else {
				tmp = I[i];
				I[i] = I[kk + k];
				I[kk + k] = tmp;
				k++;
			}

		}

		while (jj + j < kk) {
			if (V[I[jj + j] + h] == x) {
				j++;
			} else {
				tmp = I[jj + j];
				I[jj + j] = I[kk + k];
				I[kk + k] = tmp;
				k++;
			}

		}

		if (jj > start) {
			split(I, V, start, jj - start, h);
		}

		for (i = 0; i < kk - jj; i++) {
			V[I[jj + i]] = kk - 1;
		}

		if (jj == kk - 1) {
			I[jj] = -1;
		}

		if (start + len > kk) {
			split(I, V, kk, start + len - kk, h);
		}

	}

	/**
	 * Fast suffix sporting. Larsson and Sadakane's qsufsort algorithm. See
	 * http://www.cs.lth.se/Research/Algorithms/Papers/jesper5.ps
	 * 
	 * @param I
	 * @param V
	 * @param oldBuf
	 * @param oldsize
	 */
	private static void qsufsort(int[] I, int[] V, byte[] oldBuf, int oldsize) {

		// int oldsize = oldBuf.length;
		int[] buckets = new int[256];

		// No need to do that in Java.
		// for ( int i = 0; i < 256; i++ ) {
		// buckets[i] = 0;
		// }

		for (int i = 0; i < oldsize; i++) {
			buckets[oldBuf[i] & 0xff]++;
		}

		for (int i = 1; i < 256; i++) {
			buckets[i] += buckets[i - 1];
		}

		for (int i = 255; i > 0; i--) {
			buckets[i] = buckets[i - 1];
		}

		buckets[0] = 0;

		for (int i = 0; i < oldsize; i++) {
			I[++buckets[oldBuf[i] & 0xff]] = i;
		}

		I[0] = oldsize;
		for (int i = 0; i < oldsize; i++) {
			V[i] = buckets[oldBuf[i] & 0xff];
		}
		V[oldsize] = 0;

		for (int i = 1; i < 256; i++) {
			if (buckets[i] == buckets[i - 1] + 1) {
				I[buckets[i]] = -1;
			}
		}

		I[0] = -1;

		for (int h = 1; I[0] != -(oldsize + 1); h += h) {
			int len = 0;
			int i;
			for (i = 0; i < oldsize + 1;) {
				if (I[i] < 0) {
					len -= I[i];
					i -= I[i];
				} else {
					// if(len) I[i-len]=-len;
					if (len != 0) {
						I[i - len] = -len;
					}
					len = V[I[i]] + 1 - i;
					split(I, V, i, len, h);
					i += len;
					len = 0;
				}

			}

			if (len != 0) {
				I[i - len] = -len;
			}
		}

		for (int i = 0; i < oldsize + 1; i++) {
			I[V[i]] = i;
		}
	}

	/**
	 * Count the number of bytes that match in oldBuf (starting at offset
	 * oldOffset) and newBuf (starting at offset newOffset).
	 * 
	 * @param oldBuf
	 * @param oldOffset
	 * @param newBuf
	 * @param newOffset
	 * @return
	 */
	private final static int matchlen(byte[] oldBuf, int oldSize,
			int oldOffset, byte[] newBuf, int newSize, int newOffset) {
		// int end = Math
		// .min(oldBuf.length - oldOffset, newBuf.length - newOffset);
		int end = Math.min(oldSize - oldOffset, newSize - newOffset);
		for (int i = 0; i < end; i++) {
			if (oldBuf[oldOffset + i] != newBuf[newOffset + i]) {
				return i;
			}
		}
		return end;
	}

	private final static int search(int[] I, byte[] oldBuf, int oldSize,
			byte[] newBuf, int newSize, int newBufOffset, int start, int end,
			IntByRef pos) {

		if (end - start < 2) {
			int x = matchlen(oldBuf, oldSize, I[start], newBuf, newSize,
					newBufOffset);
			int y = matchlen(oldBuf, oldSize, I[end], newBuf, newSize,
					newBufOffset);

			if (x > y) {
				pos.value = I[start];
				return x;
			} else {
				pos.value = I[end];
				return y;
			}
		}

		int x = start + (end - start) / 2;
		if (Util.memcmp(oldBuf, oldSize, I[x], newBuf, newSize, newBufOffset) < 0) {
			return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, x,
					end, pos);
		} else {
			return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset,
					start, x, pos);
		}

	}

	/**
	 * @param oldFile
	 * @param newFile
	 * @param diffFile
	 * @throws IOException
	 */
	public static void bsdiff(File oldFile, File newFile, File diffFile)
			throws IOException {
		InputStream oldInputStream = new BufferedInputStream(
				new FileInputStream(oldFile));
		InputStream newInputStream = new BufferedInputStream(
				new FileInputStream(newFile));
		OutputStream diffOutputStream = new FileOutputStream(diffFile);

		byte[] diffBytes = bsdiff(oldInputStream, (int) oldFile.length(),
				newInputStream, (int) newFile.length());

		diffOutputStream.write(diffBytes);
		diffOutputStream.close();
	}

	/**
	 * @param oldInputStream
	 * @param oldsize
	 * @param newInputStream
	 * @param newsize
	 * @return
	 * @throws IOException
	 */
	public static byte[] bsdiff(InputStream oldInputStream, int oldsize,
			InputStream newInputStream, int newsize) throws IOException {

		byte[] oldBuf = new byte[oldsize];

		Util.readFromStream(oldInputStream, oldBuf, 0, oldsize);
		oldInputStream.close();

		byte[] newBuf = new byte[newsize];
		Util.readFromStream(newInputStream, newBuf, 0, newsize);
		newInputStream.close();

		return bsdiff(oldBuf, oldsize, newBuf, newsize);
	}

	/**
	 * @param oldBuf
	 * @param oldsize
	 * @param newBuf
	 * @param newsize
	 * @return
	 * @throws IOException
	 */
	public static byte[] bsdiff(byte[] oldBuf, int oldsize, byte[] newBuf,
			int newsize) throws IOException {

		int[] I = new int[oldsize + 1];
		qsufsort(I, new int[oldsize + 1], oldBuf, oldsize);

		// diff block
		int dblen = 0;
		byte[] db = new byte[newsize];

		// extra block
		int eblen = 0;
		byte[] eb = new byte[newsize];

		/*
		 * Diff file is composed as follows:
		 * 
		 * Header (32 bytes) Data (from offset 32 to end of file)
		 * 
		 * Header: Offset 0, length 8 bytes: file magic "jbdiff40" Offset 8,
		 * length 8 bytes: length of compressed ctrl block Offset 16, length 8
		 * bytes: length of compressed diff block Offset 24, length 8 bytes:
		 * length of new file
		 * 
		 * Data: 32 (length ctrlBlockLen): ctrlBlock (bzip2) 32+ctrlBlockLen
		 * (length diffBlockLen): diffBlock (bzip2) 32+ctrlBlockLen+diffBlockLen
		 * (to end of file): extraBlock (bzip2)
		 * 
		 * ctrlBlock comprises a set of records, each record 12 bytes. A record
		 * comprises 3 x 32 bit integers. The ctrlBlock is not compressed.
		 */

		ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
		DataOutputStream diffOut = new DataOutputStream(byteOut);

		/*
		 * Write as much of header as we have now. Size of ctrlBlock and
		 * diffBlock must be filled in later.
		 */
		diffOut.write(MAGIC_BYTES);
		diffOut.writeLong(-1); // place holder for ctrlBlockLen
		diffOut.writeLong(-1); // place holder for diffBlockLen
		diffOut.writeLong(newsize);
		diffOut.flush();

		GZIPOutputStream bzip2Out = new GZIPOutputStream(diffOut);
		DataOutputStream dataOut = new DataOutputStream(bzip2Out);

		int oldscore, scsc;

		int overlap, Ss, lens;
		int i;
		int scan = 0;
		int len = 0;
		int lastscan = 0;
		int lastpos = 0;
		int lastoffset = 0;

		IntByRef pos = new IntByRef();
		// int ctrlBlockLen = 0;

		while (scan < newsize) {
			oldscore = 0;

			for (scsc = scan += len; scan < newsize; scan++) {

				len = search(I, oldBuf, oldsize, newBuf, newsize, scan, 0,
						oldsize, pos);

				for (; scsc < scan + len; scsc++) {
					if ((scsc + lastoffset < oldsize)
							&& (oldBuf[scsc + lastoffset] == newBuf[scsc])) {
						oldscore++;
					}
				}

				if (((len == oldscore) && (len != 0)) || (len > oldscore + 8)) {
					break;
				}

				if ((scan + lastoffset < oldsize)
						&& (oldBuf[scan + lastoffset] == newBuf[scan])) {
					oldscore--;
				}
			}

			if ((len != oldscore) || (scan == newsize)) {
				int s = 0;
				int Sf = 0;
				int lenf = 0;
				for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) {
					if (oldBuf[lastpos + i] == newBuf[lastscan + i])
						s++;
					i++;
					if (s * 2 - i > Sf * 2 - lenf) {
						Sf = s;
						lenf = i;
					}
				}

				int lenb = 0;
				if (scan < newsize) {
					s = 0;
					int Sb = 0;
					for (i = 1; (scan >= lastscan + i) && (pos.value >= i); i++) {
						if (oldBuf[pos.value - i] == newBuf[scan - i])
							s++;
						if (s * 2 - i > Sb * 2 - lenb) {
							Sb = s;
							lenb = i;
						}
					}
				}

				if (lastscan + lenf > scan - lenb) {
					overlap = (lastscan + lenf) - (scan - lenb);
					s = 0;
					Ss = 0;
					lens = 0;
					for (i = 0; i < overlap; i++) {
						if (newBuf[lastscan + lenf - overlap + i] == oldBuf[lastpos
								+ lenf - overlap + i]) {
							s++;
						}
						if (newBuf[scan - lenb + i] == oldBuf[pos.value - lenb
								+ i]) {
							s--;
						}
						if (s > Ss) {
							Ss = s;
							lens = i + 1;
						}
					}

					lenf += lens - overlap;
					lenb -= lens;
				}

				// ? byte casting introduced here -- might affect things
				for (i = 0; i < lenf; i++) {
					db[dblen + i] = (byte) (newBuf[lastscan + i] - oldBuf[lastpos
							+ i]);
				}

				for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) {
					eb[eblen + i] = newBuf[lastscan + lenf + i];
				}

				dblen += lenf;
				eblen += (scan - lenb) - (lastscan + lenf);

				/*
				 * Write control block entry (3 x int)
				 */
				// diffOut.writeInt( lenf );
				// diffOut.writeInt( ( scan - lenb ) - ( lastscan + lenf ) );
				// diffOut.writeInt( ( pos[0] - lenb ) - ( lastpos + lenf ) );
				// ctrlBlockLen += 12;
				dataOut.writeInt(lenf);
				dataOut.writeInt((scan - lenb) - (lastscan + lenf));
				dataOut.writeInt((pos.value - lenb) - (lastpos + lenf));

				lastscan = scan - lenb;
				lastpos = pos.value - lenb;
				lastoffset = pos.value - scan;
			} // end if
		} // end while loop

		dataOut.flush();
		bzip2Out.finish();

		// now compressed ctrlBlockLen
		int ctrlBlockLen = diffOut.size() - Util.HEADER_SIZE;
		// System.err.println( "Diff: ctrlBlockLen=" + ctrlBlockLen );

		// GZIPOutputStream gzOut;

		/*
		 * Write diff block
		 */
		// gzOut = new GZIPOutputStream( diffOut );
		bzip2Out = new GZIPOutputStream(diffOut);
		bzip2Out.write(db, 0, dblen);
		bzip2Out.finish();
		bzip2Out.flush();
		int diffBlockLen = diffOut.size() - ctrlBlockLen - Util.HEADER_SIZE;
		// System.err.println( "Diff: diffBlockLen=" + diffBlockLen );

		/*
		 * Write extra block
		 */
		// gzOut = new GZIPOutputStream( diffOut );
		bzip2Out = new GZIPOutputStream(diffOut);
		bzip2Out.write(eb, 0, eblen);
		bzip2Out.finish();
		bzip2Out.flush();
		// long extraBlockLen = diffOut.size() - diffBlockLen - ctrlBlockLen -
		// HEADER_SIZE;
		// System.err.println( "Diff: extraBlockLen=" + extraBlockLen );

		diffOut.close();

		/*
		 * Write missing header info.
		 */
		ByteArrayOutputStream byteHeaderOut = new ByteArrayOutputStream(
				Util.HEADER_SIZE);
		DataOutputStream headerOut = new DataOutputStream(byteHeaderOut);
		headerOut.write(MAGIC_BYTES);
		headerOut.writeLong(ctrlBlockLen); // place holder for ctrlBlockLen
		headerOut.writeLong(diffBlockLen); // place holder for diffBlockLen
		headerOut.writeLong(newsize);
		headerOut.close();

		// Copy header information into the diff
		byte[] diffBytes = byteOut.toByteArray();
		byte[] headerBytes = byteHeaderOut.toByteArray();

		System.arraycopy(headerBytes, 0, diffBytes, 0, headerBytes.length);

		return diffBytes;
		// /*
		// * Write missing header info. Need to reopen the file with
		// RandomAccessFile
		// * for this.
		// */
		// RandomAccessFile diff = new RandomAccessFile( diffFile, "rw" );
		// diff.seek( 8 );
		// diff.writeLong( ctrlBlockLen ); // ctrlBlockLen (compressed) @offset
		// 8
		// diff.writeLong( diffBlockLen ); // diffBlockLen (compressed) @offset
		// 16
		// diff.close();
	}

	/**
	 * Run JBDiff from the command line. Params: oldfile newfile difffile. diff
	 * file will be created.
	 * 
	 * @param arg
	 * @throws IOException
	 */
	public static void main(String[] arg) throws IOException {

		if (arg.length != 3) {
			System.err
					.println("usage example: java -Xmx200m ie.wombat.jbdiff.JBDiff oldfile newfile patchfile\n");
			return;
		}

		File oldFile = new File(arg[0]);
		File newFile = new File(arg[1]);
		File diffFile = new File(arg[2]);

		bsdiff(oldFile, newFile, diffFile);

	}

	private static class IntByRef {
		private int value;
	}
}
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.
What this is

Other links

The source code

new blog posts

... this post is sponsored by my books ...
#1 New Release!	FP Best Seller