Return-Path: Delivered-To: apmail-jakarta-commons-dev-archive@apache.org Received: (qmail 97220 invoked from network); 28 Apr 2003 22:00:46 -0000 Received: from exchange.sun.com (192.18.33.10) by daedalus.apache.org with SMTP; 28 Apr 2003 22:00:46 -0000 Received: (qmail 28476 invoked by uid 97); 28 Apr 2003 22:02:51 -0000 Delivered-To: qmlist-jakarta-archive-commons-dev@nagoya.betaversion.org Received: (qmail 28469 invoked from network); 28 Apr 2003 22:02:50 -0000 Received: from daedalus.apache.org (HELO apache.org) (208.185.179.12) by nagoya.betaversion.org with SMTP; 28 Apr 2003 22:02:50 -0000 Received: (qmail 96772 invoked by uid 500); 28 Apr 2003 22:00:40 -0000 Mailing-List: contact commons-dev-help@jakarta.apache.org; run by ezmlm Precedence: bulk List-Unsubscribe: List-Subscribe: List-Help: List-Post: List-Id: "Jakarta Commons Developers List" Reply-To: "Jakarta Commons Developers List" Delivered-To: mailing list commons-dev@jakarta.apache.org Received: (qmail 96752 invoked by uid 500); 28 Apr 2003 22:00:40 -0000 Received: (qmail 96749 invoked from network); 28 Apr 2003 22:00:40 -0000 Received: from icarus.apache.org (208.185.179.13) by daedalus.apache.org with SMTP; 28 Apr 2003 22:00:39 -0000 Received: (qmail 58222 invoked by uid 1452); 28 Apr 2003 22:00:39 -0000 Date: 28 Apr 2003 22:00:39 -0000 Message-ID: <20030428220039.58221.qmail@icarus.apache.org> From: juanco@apache.org To: jakarta-commons-sandbox-cvs@apache.org Subject: cvs commit: jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff Diff.java X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N juanco 2003/04/28 15:00:39 Modified: jrcs jrcs.jpx jrcs/src/java/org/apache/commons/jrcs/diff Diff.java Log: BUG: indexing was skipping equivalence set for non-repeated items Revision Changes Path 1.3 +8 -1 jakarta-commons-sandbox/jrcs/jrcs.jpx Index: jrcs.jpx =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/jrcs.jpx,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- jrcs.jpx 28 Apr 2003 16:47:03 -0000 1.2 +++ jrcs.jpx 28 Apr 2003 22:00:38 -0000 1.3 @@ -9,11 +9,18 @@ - + + + + + + + + 1.5 +137 -16 jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/Diff.java Index: Diff.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/Diff.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- Diff.java 28 Apr 2003 17:22:49 -0000 1.4 +++ Diff.java 28 Apr 2003 22:00:38 -0000 1.5 @@ -73,10 +73,10 @@ * Implements a differencing engine that works on arrays of {@link Object Object}. * *

Within this library, the word text means a unit of information - * subject to version control. + * subject to differencing. * *

Text is represented as Object[] because - * the diff engine is capable of handling more than plain ascci. In fact, + * the diff engine is capable of handling more than plain ASCII. In fact, * arrays of any type that implements * {@link java.lang.Object#hashCode hashCode()} and * {@link java.lang.Object#equals equals()} @@ -92,18 +92,44 @@ extends ToString { + /** + * The system line separator is used for standard kinds of output + */ public static final String NL = System.getProperty("line.separator"); + + /** + * For RCS kind of output a plain newline is required. + */ public static final String RCS_EOL = "\n"; + /** + * Mark for non-matching items in source sequences. + */ static final int NOT_FOUND_i = -2; + /** + * Mark for non-matching items in target sequences. They have to + * be different in source and target because they would match + * otherwise. + */ static final int NOT_FOUND_j = -1; + /** + * Marker for End-Of-Sequence. It must be the same for both sequences. + */ static final int EOS = Integer.MAX_VALUE; - Object[] orig; + /** + * The source (original) sequence is stored here. + */ + protected Object[] orig; + + /** + * Constructs a differencer based on an original/source sequence. + * @param o the original sequences. + */ public Diff(Object[] o) { if (o == null) @@ -113,6 +139,15 @@ orig = o; } + /** + * Calculates the differences between two input sequences. + * @param orig The original/source sequence. + * @param rev The target/revised sequence. + * @return The differences as a {@link Revision Revision} object. + * @throws DifferentiationFailedException if the algorithm cannot + * construct the revised sequence out of the original one using the generated + * edit script. + */ public static Revision diff(Object[] orig, Object[] rev) throws DifferentiationFailedException { @@ -124,6 +159,12 @@ return new Diff(orig).diff(rev); } + /** + * Compares two sequences. + * @param orig The original sequence. + * @param rev Revised sequences. + * @return true if the sequences are identical, false otherwise. + */ public static boolean compare(Object[] orig, Object[] rev) { if (orig.length != rev.length) @@ -143,6 +184,14 @@ } } + /** + * Scans a sequence of indexes for a value. + * @param ndx The sequence to scan. + * @param i The starting index. + * @param target The value to scan for. + * @return The index in the sequence of a value less-than or equal to + * the target value. + */ protected int scan(int[] ndx, int i, int target) { while (ndx[i] < target) @@ -152,6 +201,20 @@ return i; } + /** + * Calculates the differences between the sequence given as original/source + * and the given target/revised sequence. + *

+ * This algorithm was designed by + * Juancarlo A�ez way back when + * there weren't any good implementations of diff in Java. + * + * @param rev The target/revised sequence. + * @return The differences as a {@link Revision Revision} object. + * @throws DifferentiationFailedException if the algorithm cannot + * construct the revised sequence out of the original one using the generated + * edit script. + */ public Revision diff(Object[] rev) throws DifferentiationFailedException { @@ -160,22 +223,23 @@ int[] jndx = buildIndex(eqs, rev, NOT_FOUND_j); eqs = null; // let gc know we're done with this - Revision deltas = new Revision(); //!!! new Revision() + Revision deltas = new Revision(); int i = 0; int j = 0; // skip matching for (; indx[i] != EOS && indx[i] == jndx[j]; i++, j++) - {/* void */ + { + /* void */ } - while (indx[i] != jndx[j]) - { // only equal if both == EOS - // they are different + while (indx[i] != jndx[j]) // only equal if both == EOS + { + // indexed items in each sequence are different int ia = i; int ja = j; - // size of this delta + // find the delta do { while (jndx[j] < 0 || jndx[j] < indx[i]) @@ -194,18 +258,25 @@ { --i; --j; - //System.err.println("************* reversing "+ i); } deltas.addDelta(Delta.newDelta(new Chunk(orig, ia, i - ia), new Chunk(rev, ja, j - ja))); + // skip matching for (; indx[i] != EOS && indx[i] == jndx[j]; i++, j++) - {/* void */ + { + /* void */ } } try { + /** @todo This should be removed at some point as it is + * just a verification and it has considerable impact + * on algorithm execution time + */ + + System.out.println(deltas.toString()); if (!compare(deltas.patch(orig), rev)) { throw new DifferentiationFailedException(); @@ -218,7 +289,20 @@ return deltas; } - + /** + * Builds the "equivalence set" for two input sequences. + *

+ * The equivalence set is a map that contains sequence items + * as keys, and a list of source-sequence indexes as values. + *

+ * Only items that appear in both input sequences are mapped, and + * the indexes in the map values correspond to the positions in the + * original/source sequence where the items were found. + * + * @param orig The original/source sequence. + * @param rev The target/revised sequence. + * @return The equivalence set. + */ protected Map buildEqSet(Object[] orig, Object[] rev) { Set items = new HashSet(Arrays.asList(orig)); @@ -241,6 +325,22 @@ return eqs; } + /** + * Indexes a sequence according to a given equivalence set. + *

+ * Items in the input sequene are searched for in the equivalence set. + * For each item, the index with the less absolute distance to the item + * number is chosen from the list in the eauivalence set. + *

+ * Items which do not appear in the equivalence set get an index of + * NF (not found). + * + * @param eqs The equivalence set. + * @param seq The sequence to index. + * @param NF The value to use for sequence items which do not appear + * on the equivalence set. + * @return The index. + */ protected int[] buildIndex(Map eqs, Object[] seq, int NF) { int[] result = new int[seq.length + 1]; @@ -254,13 +354,12 @@ else { Iterator match = matches.iterator(); - Integer value = (Integer) match.next(); - int j = value.intValue(); + int j = ((Integer) match.next()).intValue(); int distance = Math.abs(i - j); + result[i] = j; while (match.hasNext()) { - value = (Integer) match.next(); - j = value.intValue(); + j = ((Integer) match.next()).intValue();; int d = Math.abs(i - j); if (d < distance) { @@ -288,12 +387,23 @@ + /** + * Performs random edits on the input sequence. Useful for testing. + * @param text The input sequence. + * @return The sequence with random edits performed. + */ public static Object[] randomEdit(Object[] text) { return randomEdit(text, text.length); } + /** + * Performs random edits on the input sequence. Useful for testing. + * @param text The input sequence. + * @param seed A seed value for the randomizer. + * @return The sequence with random edits performed. + */ public static Object[] randomEdit(Object[] text, long seed) { List result = new ArrayList(Arrays.asList(text)); @@ -320,12 +430,23 @@ } + /** + * Shuffles around the items in the input sequence. + * @param text The input sequence. + * @return The shuffled sequence. + */ public static Object[] shuffle(Object[] text) { return shuffle(text, text.length); } + /** + * Shuffles around the items in the input sequence. + * @param text The input sequence. + * @param seed A seed value for randomizing the suffle. + * @return The shuffled sequence. + */ public static Object[] shuffle(Object[] text, long seed) { List result = new ArrayList(Arrays.asList(text)); --------------------------------------------------------------------- To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org For additional commands, e-mail: commons-dev-help@jakarta.apache.org