commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jua...@apache.org
Subject cvs commit: jakarta-commons-sandbox/jrcs/src/test/org/apache/commons/jrcs/diff DiffTest.java
Date Sat, 10 May 2003 18:55:11 GMT
juanco      2003/05/10 11:55:11

  Modified:    jrcs/src/java/org/apache/commons/jrcs/diff Diff.java
                        DiffAlgorithm.java package.html
               jrcs/src/java/org/apache/commons/jrcs/diff/myers
                        MyersDiff.java PathNode.java
               jrcs/src/test/org/apache/commons/jrcs AllTests.java
               jrcs/src/test/org/apache/commons/jrcs/diff DiffTest.java
  Log:
  Added memory optimizations to Myers' diff algorithm. Now there are no out-of-memory errors
for up to 8k of shuffled, or totally different items.
  
  Revision  Changes    Path
  1.14      +24 -2     jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/Diff.java
  
  Index: Diff.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/Diff.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- Diff.java	7 May 2003 06:34:00 -0000	1.13
  +++ Diff.java	10 May 2003 18:55:10 -0000	1.14
  @@ -144,7 +144,7 @@
   
       protected DiffAlgorithm defaultAlgorithm()
       {
  -        return new SimpleDiff();
  +        return new MyersDiff();
       }
   
       /**
  @@ -197,6 +197,12 @@
           return algorithm.diff(orig, rev);
       }
   
  +    /**
  +     * Compares the two input sequences.
  +     * @param orig The original sequence.
  +     * @param rev The revised sequence.
  +     * @return true if the sequences are identical. False otherwise.
  +     */
       public static boolean compare(Object[] orig, Object[] rev)
       {
           if (orig.length != rev.length)
  @@ -225,6 +231,22 @@
       public static String arrayToString(Object[] o)
       {
           return arrayToString(o, Diff.NL);
  +    }
  +
  +    /**
  +     * Edits all of the items in the input sequence.
  +     * @param text The input sequence.
  +     * @return A sequence of the same length with all the lines
  +     * differing from the corresponding ones in the input.
  +     */
  +    public static Object[] editAll(Object[] text)
  +    {
  +        Object[] result = new String[text.length];
  +
  +        for(int i = 0; i < text.length; i++)
  +            result[i] = text[i] + " <edited>";
  +
  +        return result;
       }
   
       /**
  
  
  
  1.5       +2 -1      jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/DiffAlgorithm.java
  
  Index: DiffAlgorithm.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/DiffAlgorithm.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- DiffAlgorithm.java	6 May 2003 18:07:51 -0000	1.4
  +++ DiffAlgorithm.java	10 May 2003 18:55:10 -0000	1.5
  @@ -77,6 +77,7 @@
        *
        * @param rev the revised text
        * @return the revision script.
  +     * @throws DifferentiationFailedException if the diff could not be computed.
        */
       public abstract Revision diff(Object[] orig, Object[] rev)
           throws DifferentiationFailedException;
  
  
  
  1.2       +26 -15    jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/package.html
  
  Index: package.html
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/package.html,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- package.html	28 May 2002 16:45:45 -0000	1.1
  +++ package.html	10 May 2003 18:55:10 -0000	1.2
  @@ -56,20 +56,12 @@
    -->
   <html>
     <head>
  -    <meta name="generator" content="HTML Tidy, see www.w3.org">
  -    <title>
  -    </title>
  +    <meta name="generator" content=
  +    "HTML Tidy for Cygwin (vers 1st February 2003), see www.w3.org">
  +    <title></title>
     </head>
     <body>
       <p>
  -      JRCS is a library that knows how to manipulate the archive files
  -      produced by the RCS and CVS version control systems. JRCS is not
  -      intended to replace neither tool. JRCS was written to be able
  -      create archive analysis tools that can do things like identify
  -      hot spots in the source code, measure the contributions by each
  -      developer, o assess how bugs make it in.
  -    </p>
  -    <p>
         The {@link org.apache.maven.jrcs.diff diff} package implements
         the differencing engine that JRCS uses. The engine has the power
         of Unix diff, is simple to understand, and can be used
  @@ -86,10 +78,29 @@
         differencing using this library.
       </p>
       <p>
  -      @author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
  -      @version $Id: Archive.java,v 1.16 2002/02/27 20:58:58 sbailliez
  -      Exp $ @see Diff @see org.apache.maven.jrcs.rcs.Archive
  +      Two implementations of the differencing algorithm are provided.
       </p>
  +    <ul>
  +      <li>
  +        {@link SimpleDiff Simple Diff} is a verys imple algorithm that
  +        is fast and works well with very large input sequences, but
  +        that frequently produces result that are subotimal (at times
  +        four or more times larger than GNU diff).
  +      </li>
  +      <li>
  +        {@link org.apache.commons.jrcs.diff.myers.MyersDiff MyersDiff}
  +        is an implementation of <a href=
  +        "http://www.cs.arizona.edu/people/gene/">Gene Myers</a>
  +        differencing algorithm. Myer's algorithm produces optimum
  +        results (minimum diffs), but consumes considerably more memory
  +        than SimpleDiff, so its not suitable for very large files.
  +      </li>
  +    </ul>
  +<pre>
  +@author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
  +@version $Id$
  +@see Diff 
  +@see org.apache.maven.jrcs.rcs.Archive
  +</pre>
     </body>
   </html>
  -
  
  
  
  1.5       +24 -32    jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/myers/MyersDiff.java
  
  Index: MyersDiff.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/myers/MyersDiff.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- MyersDiff.java	6 May 2003 18:07:51 -0000	1.4
  +++ MyersDiff.java	10 May 2003 18:55:11 -0000	1.5
  @@ -123,14 +123,14 @@
   
           PathNode path = null;
   
  -        diagonal[middle + 1] = new PathNode(0, -1);
  +        diagonal[middle + 1] = new Snake(0, -1, null);
           for (int d = 0; d < MAX; d++)
           {
               for (int k = -d; k <= d; k += 2)
               {
  -                int kmiddle = middle + k;
  -                int kplus = kmiddle + 1;
  -                int kminus = kmiddle - 1;
  +                final int kmiddle = middle + k;
  +                final int kplus = kmiddle + 1;
  +                final int kminus = kmiddle - 1;
                   PathNode prev = null;
   
                   int i;
  @@ -146,13 +146,12 @@
                       prev = diagonal[kminus];
                   }
   
  -                if (prev.i < 0 || prev.j < 0)
  -                {
  -                    prev = null; // discard artificial nodes used for bootstrapping
  -                }
  +                diagonal[kminus] = null; // no longer used
   
                   int j = i - k;
   
  +                PathNode node = new DiffNode(i, j, prev);
  +
                   // orig and rev are zero-based
                   // but the algorithm is one-based
                   // that's why there's no +1 when indexing the sequences
  @@ -161,14 +160,18 @@
                       i++;
                       j++;
                   }
  +                if (i > node.i)
  +                    node = new Snake(i, j, node);
   
  -                diagonal[kmiddle] = new PathNode(i, j, prev);
  +                diagonal[kmiddle] = node;
   
                   if (i >= N && j >= M)
                   {
                       return diagonal[kmiddle];
                   }
               }
  +            diagonal[middle+d-1] = null;
  +
           }
           // According to Myers, this cannot happen
           throw new DifferentiationFailedException("could not find a diff path");
  @@ -181,8 +184,8 @@
        * @param orig The original sequence.
        * @param rev The revised sequence.
        * @return A {@link Revision} script corresponding to the path.
  -     * @throws DifferentiationFailedException if the {@link Revision} could
  -     *         not be built.
  +     * @throws DifferentiationFailedException if a {@link Revision} could
  +     *         not be built from the given path.
        */
       public static Revision buildRevision(PathNode path, Object[] orig, Object[] rev)
       {
  @@ -194,34 +197,23 @@
               throw new IllegalArgumentException("revised sequence is null");
   
           Revision revision = new Revision();
  -        while (path != null && path.prev != null)
  +        if (path.isSnake())
  +            path = path.prev;
  +        while (path != null && path.prev != null && path.prev.j >= 0)
           {
  +            assert(!path.isSnake());
               int i = path.i;
               int j = path.j;
  -            while (i > 0 && j > 0
  -                   && i > path.prev.i && j > path.prev.j // donnot
follow a snake past a previous node
  -                   && orig[i - 1].equals(rev[j - 1])) // check that it is indeed
a snake
  -            {
  -                // reverse snake
  -                i--;
  -                j--;
  -            }
   
  -            int ianchor;
  -            int janchor;
  -            do
  -            {
  -                path = path.prev;
  -                ianchor = path.i;
  -                janchor = path.j;
  -            }
  -            while (path.prev != null
  -                   && (ianchor == path.prev.i || janchor == path.prev.j) // while
no snake
  -                   );
  +            path = path.prev;
  +            int ianchor = path.i;
  +            int janchor = path.j;
   
               Delta delta = Delta.newDelta(new Chunk(orig, ianchor, i - ianchor),
                                            new Chunk(rev, janchor, j - janchor));
               revision.insertDelta(delta);
  +            if (path.isSnake())
  +                path = path.prev;
           }
           return revision;
       }
  
  
  
  1.6       +46 -13    jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/myers/PathNode.java
  
  Index: PathNode.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/java/org/apache/commons/jrcs/diff/myers/PathNode.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- PathNode.java	8 May 2003 05:35:49 -0000	1.5
  +++ PathNode.java	10 May 2003 18:55:11 -0000	1.6
  @@ -57,7 +57,17 @@
   
   package org.apache.commons.jrcs.diff.myers;
   
  -public final class PathNode
  +/**
  + * A node in a diffpath.
  + *
  + * @version $Revision$ $Date$
  + * @author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
  + *
  + * @see DiffNode
  + * @see Snake
  + *
  + */
  +public abstract class PathNode
   {
       /** Position in the original sequence. */
       public final int i;
  @@ -67,16 +77,6 @@
       public final PathNode prev;
   
       /**
  -     * Creates a diffpath of a single node.
  -     * @param i The position in the original sequence.
  -     * @param j The position in the revised sequence.
  -     */
  -    public PathNode(int i, int j)
  -    {
  -        this(i, j, null);
  -    }
  -
  -    /**
        * Concatenates a new path node with an existing diffpath.
        * @param i The position in the original sequence for the new node.
        * @param j The position in the revised sequence for the new node.
  @@ -84,11 +84,44 @@
        */
       public PathNode(int i, int j, PathNode prev)
       {
  -        if (prev != null && (i - prev.i) == (j - prev.j))
  -            throw new IllegalArgumentException("node points to a diagonal");
           this.i = i;
           this.j = j;
           this.prev = prev;
  +    }
  +
  +    /**
  +     * Is this node a {@link Snake Snake node}?
  +     * @return true if this is a {@link Snake Snake node}
  +     */
  +    public abstract boolean isSnake();
  +
  +    /**
  +     * Is this a bootstrap node?
  +     * <p>
  +     * In bottstrap nodes one of the two corrdinates is
  +     * less than zero.
  +     * @return tru if this is a bootstrap node.
  +     */
  +    public boolean isBootstrap()
  +    {
  +        return i < 0 || j < 0;
  +    }
  +
  +    /**
  +     * Skips sequences of {@link DiffNode DiffNodes} until a
  +     * {@link Snake} or bootstrap node is found, or the end
  +     * of the path is reached.
  +     * @return The next first {@link Snake} or bootstrap node in the path, or
  +     * <code>null</code>
  +     * if none found.
  +     */
  +    public final PathNode previousSnake()
  +    {
  +        if (isBootstrap())
  +            return null;
  +        if (!isSnake() && prev != null)
  +            return prev.previousSnake();
  +        return this;
       }
   
       /**
  
  
  
  1.3       +2 -1      jakarta-commons-sandbox/jrcs/src/test/org/apache/commons/jrcs/AllTests.java
  
  Index: AllTests.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/test/org/apache/commons/jrcs/AllTests.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- AllTests.java	6 May 2003 14:59:17 -0000	1.2
  +++ AllTests.java	10 May 2003 18:55:11 -0000	1.3
  @@ -67,7 +67,8 @@
   
     public static Test suite() {
       TestSuite suite = new TestSuite();
  -    suite.addTestSuite(org.apache.commons.jrcs.diff.DiffTest.class);
  +    suite.addTestSuite(org.apache.commons.jrcs.diff.SimpleDiffTests.class);
  +    suite.addTestSuite(org.apache.commons.jrcs.diff.MyersDiffTests.class);
       suite.addTestSuite(org.apache.commons.jrcs.rcs.ArchiveTest.class);
       suite.addTestSuite(org.apache.commons.jrcs.rcs.ChangeDeltaTest.class);
       suite.addTestSuite(org.apache.commons.jrcs.rcs.KeywordsFormatTest.class);
  
  
  
  1.10      +37 -20    jakarta-commons-sandbox/jrcs/src/test/org/apache/commons/jrcs/diff/DiffTest.java
  
  Index: DiffTest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/jrcs/src/test/org/apache/commons/jrcs/diff/DiffTest.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- DiffTest.java	7 May 2003 06:34:00 -0000	1.9
  +++ DiffTest.java	10 May 2003 18:55:11 -0000	1.10
  @@ -59,14 +59,17 @@
   
   import junit.framework.*;
   
  -public class DiffTest
  +public abstract class DiffTest
       extends TestCase
   {
       static final int LARGE=4*1024;
   
  -    public DiffTest(String testName)
  +    protected DiffAlgorithm algorithm;
  +
  +    public DiffTest(String testName, DiffAlgorithm algorithm)
       {
           super(testName);
  +        this.algorithm = algorithm;
       }
   
       public static Test suite()
  @@ -124,19 +127,19 @@
       public void testDeleteAll()
           throws DifferentiationFailedException, PatchFailedException
       {
  -        Revision revision = Diff.diff(original, empty);
  -        assertEquals(revision.size(), 1);
  -        assertEquals(revision.getDelta(0).getClass(), DeleteDelta.class);
  +        Revision revision = Diff.diff(original, empty, algorithm);
  +        assertEquals(1, revision.size());
  +        assertEquals(DeleteDelta.class, revision.getDelta(0).getClass());
           assertTrue(Diff.compare(revision.patch(original), empty));
       }
   
       public void testTwoDeletes()
           throws DifferentiationFailedException, PatchFailedException
       {
  -        Revision revision = Diff.diff(original, rev1);
  -        assertEquals(revision.size(), 2);
  -        assertEquals(revision.getDelta(0).getClass(), DeleteDelta.class);
  -        assertEquals(revision.getDelta(1).getClass(), DeleteDelta.class);
  +        Revision revision = Diff.diff(original, rev1, algorithm);
  +        assertEquals(2, revision.size());
  +        assertEquals(DeleteDelta.class, revision.getDelta(0).getClass());
  +        assertEquals(DeleteDelta.class, revision.getDelta(1).getClass());
           assertTrue(Diff.compare(revision.patch(original), rev1));
           assertEquals("3d2" + Diff.NL +
                        "< [3] three" + Diff.NL +
  @@ -148,7 +151,7 @@
       public void testChangeAtTheEnd()
           throws DifferentiationFailedException, PatchFailedException
       {
  -        Revision revision = Diff.diff(original, rev2);
  +        Revision revision = Diff.diff(original, rev2, algorithm);
           assertEquals(1, revision.size());
           assertEquals(ChangeDelta.class, revision.getDelta(0).getClass());
           assertTrue(Diff.compare(revision.patch(original), rev2));
  @@ -164,7 +167,7 @@
       {
           try
           {
  -            Revision revision = Diff.diff(original, rev2);
  +            Revision revision = Diff.diff(original, rev2, algorithm);
               assertTrue(!Diff.compare(revision.patch(rev1), rev2));
               fail("PatchFailedException not thrown");
           }
  @@ -195,7 +198,7 @@
                          "[6] six",
                          "[4] four"
           };
  -        Revision revision = Diff.diff(orig, rev);
  +        Revision revision = Diff.diff(orig, rev, algorithm);
           Object[] patched = revision.patch(orig);
           assertTrue(Diff.compare(patched, rev));
       }
  @@ -223,7 +226,7 @@
                          "six revised",
                          "[5] five"
           };
  -        Revision revision = Diff.diff(orig, rev);
  +        Revision revision = Diff.diff(orig, rev, algorithm);
           Object[] patched = revision.patch(orig);
           assertTrue(Diff.compare(patched, rev));
       }
  @@ -244,7 +247,7 @@
           for (int seed = 0; seed < 10; seed++)
           {
               Object[] shuffle = Diff.shuffle(orig);
  -            Revision revision = Diff.diff(orig, shuffle);
  +            Revision revision = Diff.diff(orig, shuffle, algorithm);
               Object[] patched = revision.patch(orig);
               if (!Diff.compare(patched, shuffle))
               {
  @@ -260,7 +263,7 @@
           for (int seed = 0; seed < 10; seed++)
           {
               Object[] random = Diff.randomEdit(orig, seed);
  -            Revision revision = Diff.diff(orig, random);
  +            Revision revision = Diff.diff(orig, random, algorithm);
               Object[] patched = revision.patch(orig);
               if (!Diff.compare(patched, random))
               {
  @@ -332,7 +335,7 @@
           Visitor visitor = new Visitor();
           try
           {
  -            Diff.diff(orig, rev).accept(visitor);
  +            Diff.diff(orig, rev, algorithm).accept(visitor);
               assertEquals(visitor.toString(),
                            "visited Revision\n" +
                            "[2] two revised\n" +
  @@ -362,10 +365,10 @@
           throws DifferentiationFailedException, PatchFailedException
       {
           Object[] orig = Diff.randomSequence(LARGE);
  -        for (int seed = 0; seed < 8; seed++)
  +        for (int seed = 0; seed < 3; seed++)
           {
               Object[] rev = Diff.shuffle(orig);
  -            Revision revision = Diff.diff(orig, rev);
  +            Revision revision = Diff.diff(orig, rev, algorithm);
               Object[] patched = revision.patch(orig);
               if (!Diff.compare(patched, rev))
               {
  @@ -379,15 +382,29 @@
           throws DifferentiationFailedException, PatchFailedException
       {
           Object[] orig = Diff.randomSequence(LARGE);
  -        for (int seed = 0; seed < 8; seed++)
  +        for (int seed = 0; seed < 3; seed++)
           {
               Object[] rev = Diff.randomEdit(orig, seed);
  -            Revision revision = Diff.diff(orig, rev);
  +            Revision revision = Diff.diff(orig, rev, algorithm);
               Object[] patched = revision.patch(orig);
               if (!Diff.compare(patched, rev))
               {
                   fail("iter " + seed + " revisions differ after patch");
               }
           }
  +    }
  +
  +    public void testLargeAllEdited()
  +        throws DifferentiationFailedException, PatchFailedException
  +    {
  +        Object[] orig = Diff.randomSequence(LARGE);
  +        Object[] rev = Diff.editAll(orig);
  +        Revision revision = Diff.diff(orig, rev, algorithm);
  +        Object[] patched = revision.patch(orig);
  +        if (!Diff.compare(patched, rev))
  +        {
  +            fail("revisions differ after patch");
  +        }
  +
       }
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message