commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ki...@apache.org
Subject [1/2] [text] Migrating Myers algorithm from [collections]
Date Thu, 05 Feb 2015 02:33:51 GMT
Repository: commons-text
Updated Branches:
  refs/heads/myers-algo [created] a7e88eef1


Migrating Myers algorithm from [collections]


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/cca1f199
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/cca1f199
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/cca1f199

Branch: refs/heads/myers-algo
Commit: cca1f19918760d4e538ff4d70d875878418f6f1a
Parents: 37c7804
Author: Bruno P. Kinoshita <kinow@apache.org>
Authored: Sun Dec 14 23:41:43 2014 -0200
Committer: Bruno P. Kinoshita <kinow@apache.org>
Committed: Sun Dec 14 23:41:43 2014 -0200

----------------------------------------------------------------------
 .../commons/text/diff/CommandVisitor.java       | 143 ++++++++++
 .../apache/commons/text/diff/DeleteCommand.java |  55 ++++
 .../apache/commons/text/diff/EditCommand.java   |  82 ++++++
 .../apache/commons/text/diff/EditScript.java    | 133 +++++++++
 .../apache/commons/text/diff/InsertCommand.java |  57 ++++
 .../apache/commons/text/diff/KeepCommand.java   |  57 ++++
 .../commons/text/diff/ReplacementsFinder.java   | 109 +++++++
 .../commons/text/diff/ReplacementsHandler.java  |  52 ++++
 .../commons/text/diff/StringsComparator.java    | 281 +++++++++++++++++++
 .../apache/commons/text/diff/package-info.java  |  22 ++
 10 files changed, 991 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
new file mode 100644
index 0000000..ae3833c
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * This interface should be implemented by user object to walk
+ * through {@link EditScript EditScript} objects.
+ * <p>
+ * Users should implement this interface in order to walk through
+ * the {@link EditScript EditScript} object created by the comparison
+ * of two sequences. This is a direct application of the visitor
+ * design pattern. The {@link EditScript#visit EditScript.visit}
+ * method takes an object implementing this interface as an argument,
+ * it will perform the loop over all commands in the script and the
+ * proper methods of the user class will be called as the commands are
+ * encountered.
+ * <p>
+ * The implementation of the user visitor class will depend on the
+ * need. Here are two examples.
+ * <p>
+ * The first example is a visitor that build the longest common
+ * subsequence:
+ * <pre>
+ * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ *
+ * import java.util.ArrayList;
+ *
+ * public class LongestCommonSubSequence implements CommandVisitor {
+ *
+ *   public LongestCommonSubSequence() {
+ *     a = new ArrayList();
+ *   }
+ *
+ *   public void visitInsertCommand(Object object) {
+ *   }
+ *
+ *   public void visitKeepCommand(Object object) {
+ *     a.add(object);
+ *   }
+ *
+ *   public void visitDeleteCommand(Object object) {
+ *   }
+ *
+ *   public Object[] getSubSequence() {
+ *     return a.toArray();
+ *   }
+ *
+ *   private ArrayList a;
+ *
+ * }
+ * </pre>
+ * <p>
+ * The second example is a visitor that shows the commands and the way
+ * they transform the first sequence into the second one:
+ * <pre>
+ * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ *
+ * import java.util.Arrays;
+ * import java.util.ArrayList;
+ * import java.util.Iterator;
+ *
+ * public class ShowVisitor implements CommandVisitor {
+ *
+ *   public ShowVisitor(Object[] sequence1) {
+ *     v = new ArrayList();
+ *     v.addAll(Arrays.asList(sequence1));
+ *     index = 0;
+ *   }
+ *
+ *   public void visitInsertCommand(Object object) {
+ *     v.insertElementAt(object, index++);
+ *     display("insert", object);
+ *   }
+ *
+ *   public void visitKeepCommand(Object object) {
+ *     ++index;
+ *     display("keep  ", object);
+ *   }
+ *
+ *   public void visitDeleteCommand(Object object) {
+ *     v.remove(index);
+ *     display("delete", object);
+ *   }
+ *
+ *   private void display(String commandName, Object object) {
+ *     System.out.println(commandName + " " + object + " ->" + this);
+ *   }
+ *
+ *   public String toString() {
+ *     StringBuffer buffer = new StringBuffer();
+ *     for (Iterator iter = v.iterator(); iter.hasNext();) {
+ *       buffer.append(' ').append(iter.next());
+ *     }
+ *     return buffer.toString();
+ *   }
+ *
+ *   private ArrayList v;
+ *   private int index;
+ *
+ * }
+ * </pre>
+ *
+ * @since 4.0
+ * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public interface CommandVisitor<T> {
+
+    /**
+     * Method called when an insert command is encountered.
+     *
+     * @param object object to insert (this object comes from the second sequence)
+     */
+    void visitInsertCommand(T object);
+
+    /**
+     * Method called when a keep command is encountered.
+     *
+     * @param object object to keep (this object comes from the first sequence)
+     */
+    void visitKeepCommand(T object);
+
+    /**
+     * Method called when a delete command is encountered.
+     *
+     * @param object object to delete (this object comes from the first sequence)
+     */
+    void visitDeleteCommand(T object);
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
new file mode 100644
index 0000000..0de3a43
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the deletion of one object of the first sequence.
+ * <p>
+ * When one object of the first sequence has no corresponding object in the
+ * second sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the deletion of this object. The objects embedded in
+ * these type of commands always come from the first sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class DeleteCommand<T> extends EditCommand<T> {
+
+    /**
+     * Simple constructor. Creates a new instance of {@link DeleteCommand}.
+     *
+     * @param object  the object of the first sequence that should be deleted
+     */
+    public DeleteCommand(final T object) {
+        super(object);
+    }
+
+    /**
+     * Accept a visitor. When a <code>DeleteCommand</code> accepts a visitor,
it calls
+     * its {@link CommandVisitor#visitDeleteCommand visitDeleteCommand} method.
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    @Override
+    public void accept(final CommandVisitor<T> visitor) {
+        visitor.visitDeleteCommand(getObject());
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/EditCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
new file mode 100644
index 0000000..7c5ff76
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Abstract base class for all commands used to transform an objects sequence
+ * into another one.
+ * <p>
+ * When two objects sequences are compared through the
+ * {@link SequencesComparator#getScript SequencesComparator.getScript} method,
+ * the result is provided has a {@link EditScript script} containing the commands
+ * that progressively transform the first sequence into the second one.
+ * <p>
+ * There are only three types of commands, all of which are subclasses of this
+ * abstract class. Each command is associated with one object belonging to at
+ * least one of the sequences. These commands are {@link InsertCommand
+ * InsertCommand} which correspond to an object of the second sequence being
+ * inserted into the first sequence, {@link DeleteCommand DeleteCommand} which
+ * correspond to an object of the first sequence being removed and
+ * {@link KeepCommand KeepCommand} which correspond to an object of the first
+ * sequence which <code>equals</code> an object in the second sequence. It is
+ * guaranteed that comparison is always performed this way (i.e. the
+ * <code>equals</code> method of the object from the first sequence is used and
+ * the object passed as an argument comes from the second sequence) ; this can
+ * be important if subclassing is used for some elements in the first sequence
+ * and the <code>equals</code> method is specialized.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public abstract class EditCommand<T> {
+
+    /** Object on which the command should be applied. */
+    private final T object;
+
+    /**
+     * Simple constructor. Creates a new instance of EditCommand
+     *
+     * @param object  reference to the object associated with this command, this
+     *   refers to an element of one of the sequences being compared
+     */
+    protected EditCommand(final T object) {
+        this.object = object;
+    }
+
+    /**
+     * Returns the object associated with this command.
+     *
+     * @return the object on which the command is applied
+     */
+    protected T getObject() {
+        return object;
+    }
+
+    /**
+     * Accept a visitor.
+     * <p>
+     * This method is invoked for each commands belonging to
+     * an {@link EditScript EditScript}, in order to implement the visitor design pattern
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    public abstract void accept(CommandVisitor<T> visitor);
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/EditScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java
new file mode 100644
index 0000000..ab7b04e
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/EditScript.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class gathers all the {@link EditCommand commands} needed to transform
+ * one objects sequence into another objects sequence.
+ * <p>
+ * An edit script is the most general view of the differences between two
+ * sequences. It is built as the result of the comparison between two sequences
+ * by the {@link SequencesComparator SequencesComparator} class. The user can
+ * walk through it using the <em>visitor</em> design pattern.
+ * <p>
+ * It is guaranteed that the objects embedded in the {@link InsertCommand insert
+ * commands} come from the second sequence and that the objects embedded in
+ * either the {@link DeleteCommand delete commands} or {@link KeepCommand keep
+ * commands} come from the first sequence. This can be important if subclassing
+ * is used for some elements in the first sequence and the <code>equals</code>
+ * method is specialized.
+ *
+ * @see SequencesComparator
+ * @see EditCommand
+ * @see CommandVisitor
+ * @see ReplacementsHandler
+ *
+ * @since 4.0
+ * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class EditScript<T> {
+
+    /** Container for the commands. */
+    private final List<EditCommand<T>> commands;
+
+    /** Length of the longest common subsequence. */
+    private int lcsLength;
+
+    /** Number of modifications. */
+    private int modifications;
+
+    /**
+     * Simple constructor. Creates a new empty script.
+     */
+    public EditScript() {
+        commands = new ArrayList<EditCommand<T>>();
+        lcsLength = 0;
+        modifications = 0;
+    }
+
+    /**
+     * Add a keep command to the script.
+     *
+     * @param command  command to add
+     */
+    public void append(final KeepCommand<T> command) {
+        commands.add(command);
+        ++lcsLength;
+    }
+
+    /**
+     * Add an insert command to the script.
+     *
+     * @param command  command to add
+     */
+    public void append(final InsertCommand<T> command) {
+        commands.add(command);
+        ++modifications;
+    }
+
+    /**
+     * Add a delete command to the script.
+     *
+     * @param command  command to add
+     */
+    public void append(final DeleteCommand<T> command) {
+        commands.add(command);
+        ++modifications;
+    }
+
+    /**
+     * Visit the script. The script implements the <em>visitor</em> design
+     * pattern, this method is the entry point to which the user supplies its
+     * own visitor, the script will be responsible to drive it through the
+     * commands in order and call the appropriate method as each command is
+     * encountered.
+     *
+     * @param visitor  the visitor that will visit all commands in turn
+     */
+    public void visit(final CommandVisitor<T> visitor) {
+        for (final EditCommand<T> command : commands) {
+            command.accept(visitor);
+        }
+    }
+
+    /**
+     * Get the length of the Longest Common Subsequence (LCS). The length of the
+     * longest common subsequence is the number of {@link KeepCommand keep
+     * commands} in the script.
+     *
+     * @return length of the Longest Common Subsequence
+     */
+    public int getLCSLength() {
+        return lcsLength;
+    }
+
+    /**
+     * Get the number of effective modifications. The number of effective
+     * modification is the number of {@link DeleteCommand delete} and
+     * {@link InsertCommand insert} commands in the script.
+     *
+     * @return number of effective modifications
+     */
+    public int getModifications() {
+        return modifications;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/InsertCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
new file mode 100644
index 0000000..9ec9c6a
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the insertion of one object of the second sequence.
+ * <p>
+ * When one object of the second sequence has no corresponding object in the
+ * first sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the insertion of this object. The objects embedded in
+ * these type of commands always come from the second sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class InsertCommand<T> extends EditCommand<T> {
+
+    /**
+     * Simple constructor. Creates a new instance of InsertCommand
+     *
+     * @param object  the object of the second sequence that should be inserted
+     */
+    public InsertCommand(final T object) {
+        super(object);
+    }
+
+    /**
+     * Accept a visitor. When an <code>InsertCommand</code> accepts a visitor,
+     * it calls its {@link CommandVisitor#visitInsertCommand visitInsertCommand}
+     * method.
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    @Override
+    public void accept(final CommandVisitor<T> visitor) {
+        visitor.visitInsertCommand(getObject());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/KeepCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
new file mode 100644
index 0000000..a511bfe
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the keeping of one object present in both sequences.
+ * <p>
+ * When one object of the first sequence <code>equals</code> another objects
in
+ * the second sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the keeping of this object. The objects embedded in
+ * these type of commands always come from the first sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class KeepCommand<T> extends EditCommand<T> {
+
+    /**
+     * Simple constructor. Creates a new instance of KeepCommand
+     *
+     * @param object  the object belonging to both sequences (the object is a
+     *   reference to the instance in the first sequence which is known
+     *   to be equal to an instance in the second sequence)
+     */
+    public KeepCommand(final T object) {
+        super(object);
+    }
+
+    /**
+     * Accept a visitor. When a <code>KeepCommand</code> accepts a visitor, it
+     * calls its {@link CommandVisitor#visitKeepCommand visitKeepCommand} method.
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    @Override
+    public void accept(final CommandVisitor<T> visitor) {
+        visitor.visitKeepCommand(getObject());
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
new file mode 100644
index 0000000..361a26d
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class handles sequences of replacements resulting from a comparison.
+ * <p>
+ * The comparison of two objects sequences leads to the identification of common
+ * parts and parts which only belong to the first or to the second sequence. The
+ * common parts appear in the edit script in the form of <em>keep</em> commands,
+ * they can be considered as synchronization objects between the two sequences.
+ * These synchronization objects split the two sequences in synchronized
+ * sub-sequences. The first sequence can be transformed into the second one by
+ * replacing each synchronized sub-sequence of the first sequence by the
+ * corresponding sub-sequence of the second sequence. This is a synthetic way to
+ * see an {@link EditScript edit script}, replacing individual
+ * {@link DeleteCommand delete}, {@link KeepCommand keep} and
+ * {@link InsertCommand insert} commands by fewer replacements acting on
+ * complete sub-sequences.
+ * <p>
+ * This class is devoted to perform this interpretation. It visits an
+ * {@link EditScript edit script} (because it implements the
+ * {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
+ * handler implementing the {@link ReplacementsHandler ReplacementsHandler}
+ * interface to process the sub-sequences.
+ *
+ * @see ReplacementsHandler
+ * @see EditScript
+ * @see SequencesComparator
+ *
+ * @since 4.0
+ * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class ReplacementsFinder<T> implements CommandVisitor<T> {
+
+    private final List<T> pendingInsertions;
+    private final List<T> pendingDeletions;
+    private int     skipped;
+
+    /** Handler to call when synchronized sequences are found. */
+    private final ReplacementsHandler<T> handler;
+
+    /**
+     * Simple constructor. Creates a new instance of {@link ReplacementsFinder}.
+     *
+     * @param handler  handler to call when synchronized sequences are found
+     */
+    public ReplacementsFinder(final ReplacementsHandler<T> handler) {
+        pendingInsertions = new ArrayList<T>();
+        pendingDeletions  = new ArrayList<T>();
+        skipped           = 0;
+        this.handler      = handler;
+    }
+
+    /**
+     * Add an object to the pending insertions set.
+     *
+     * @param object  object to insert
+     */
+    public void visitInsertCommand(final T object) {
+        pendingInsertions.add(object);
+    }
+
+    /**
+     * Handle a synchronization object.
+     * <p>
+     * When a synchronization object is identified, the pending insertions and
+     * pending deletions sets are provided to the user handler as subsequences.
+     *
+     * @param object  synchronization object detected
+     */
+    public void visitKeepCommand(final T object) {
+        if (pendingDeletions.isEmpty() && pendingInsertions.isEmpty()) {
+            ++skipped;
+        } else {
+            handler.handleReplacement(skipped, pendingDeletions, pendingInsertions);
+            pendingDeletions.clear();
+            pendingInsertions.clear();
+            skipped = 1;
+        }
+    }
+
+    /**
+     * Add an object to the pending deletions set.
+     *
+     * @param object  object to delete
+     */
+    public void visitDeleteCommand(final T object) {
+        pendingDeletions.add(object);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
new file mode 100644
index 0000000..fe99a03
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.List;
+
+/**
+ * This interface is devoted to handle synchronized replacement sequences.
+ *
+ * @see ReplacementsFinder
+ * @since 4.0
+ * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $
+ */
+public interface ReplacementsHandler<T> {
+
+    /**
+     * Handle two synchronized sequences.
+     * <p>
+     * This method is called by a {@link ReplacementsFinder ReplacementsFinder}
+     * instance when it has synchronized two sub-sequences of object arrays
+     * being compared, and at least one of the sequences is non-empty. Since the
+     * sequences are synchronized, the objects before the two sub-sequences are
+     * equals (if they exist). This property also holds for the objects after
+     * the two sub-sequences.
+     * <p>
+     * The replacement is defined as replacing the <code>from</code>
+     * sub-sequence into the <code>to</code> sub-sequence.
+     *
+     * @param skipped  number of tokens skipped since the last call (i.e. number of
+     *   tokens that were in both sequences), this number should be strictly positive
+     *   except on the very first call where it can be zero (if the first object of
+     *   the two sequences are different)
+     * @param from  sub-sequence of objects coming from the first sequence
+     * @param to  sub-sequence of objects coming from the second sequence
+     */
+    void handleReplacement(int skipped, List<T> from, List<T> to);
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/StringsComparator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
new file mode 100644
index 0000000..c59f89f
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
@@ -0,0 +1,281 @@
+package org.apache.commons.text.diff;
+
+
+
+public class StringsComparator {
+    
+    private final String left;
+    private final String right;
+    
+    /** Temporary variables. */
+    private final int[] vDown;
+    private final int[] vUp;
+    
+    public StringsComparator(String left, String right) {
+        this.left = left;
+        this.right = right;
+        
+        final int size = left.length() + right.length() + 2;
+        vDown = new int[size];
+        vUp   = new int[size];
+    }
+    
+    /**
+     * Get the {@link EditScript} object.
+     * <p>
+     * It is guaranteed that the objects embedded in the {@link InsertCommand
+     * insert commands} come from the second sequence and that the objects
+     * embedded in either the {@link DeleteCommand delete commands} or
+     * {@link KeepCommand keep commands} come from the first sequence. This can
+     * be important if subclassing is used for some elements in the first
+     * sequence and the <code>equals</code> method is specialized.
+     *
+     * @return the edit script resulting from the comparison of the two
+     *         sequences
+     */
+    public EditScript getScript() {
+        final EditScript script = new EditScript();
+        buildScript(0, left.length(), 0, right.length(), script);
+        return script;
+    }
+
+    /**
+     * Build an edit script.
+     *
+     * @param start1  the begin of the first sequence to be compared
+     * @param end1  the end of the first sequence to be compared
+     * @param start2  the begin of the second sequence to be compared
+     * @param end2  the end of the second sequence to be compared
+     * @param script the edited script
+     */
+    private void buildScript(final int start1, final int end1, final int start2, final int
end2,
+            final EditScript script) {
+        final Snake middle = getMiddleSnake(start1, end1, start2, end2);
+
+        if (middle == null
+                || middle.getStart() == end1 && middle.getDiag() == end1 - end2
+                || middle.getEnd() == start1 && middle.getDiag() == start1 - start2)
{
+
+            int i = start1;
+            int j = start2;
+            while (i < end1 || j < end2) {
+                if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j))
{
+                    script.append(new KeepCommand(left.charAt(i)));
+                    ++i;
+                    ++j;
+                } else {
+                    if (end1 - start1 > end2 - start2) {
+                        script.append(new DeleteCommand(left.charAt(i)));
+                        ++i;
+                    } else {
+                        script.append(new InsertCommand(right.charAt(j)));
+                        ++j;
+                    }
+                }
+            }
+
+        } else {
+
+            buildScript(start1, middle.getStart(),
+                        start2, middle.getStart() - middle.getDiag(),
+                        script);
+            for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
+                script.append(new KeepCommand(left.charAt(i)));
+            }
+            buildScript(middle.getEnd(), end1,
+                        middle.getEnd() - middle.getDiag(), end2,
+                        script);
+        }
+    }
+    
+    private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
+     // Myers Algorithm
+        // Initialisations
+        final int m = end1 - start1;
+        final int n = end2 - start2;
+        if (m == 0 || n == 0) {
+            return null;
+        }
+
+        final int delta  = m - n;
+        final int sum    = n + m;
+        final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
+        vDown[1+offset] = start1;
+        vUp[1+offset]   = end1 + 1;
+
+        for (int d = 0; d <= offset ; ++d) {
+            // Down
+            for (int k = -d; k <= d; k += 2) {
+                // First step
+
+                final int i = k + offset;
+                if (k == -d || k != d && vDown[i-1] < vDown[i+1]) {
+                    vDown[i] = vDown[i+1];
+                } else {
+                    vDown[i] = vDown[i-1] + 1;
+                }
+
+                int x = vDown[i];
+                int y = x - start1 + start2 - k;
+
+                while (x < end1 && y < end2 && left.charAt(x) == right.charAt(y))
{
+                    vDown[i] = ++x;
+                    ++y;
+                }
+                // Second step
+                if (delta % 2 != 0 && delta - d <= k && k <= delta
+ d) {
+                    if (vUp[i-delta] <= vDown[i]) {
+                        return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2);
+                    }
+                }
+            }
+
+            // Up
+            for (int k = delta - d; k <= delta + d; k += 2) {
+                // First step
+                final int i = k + offset - delta;
+                if (k == delta - d
+                        || k != delta + d && vUp[i+1] <= vUp[i-1]) {
+                    vUp[i] = vUp[i+1] - 1;
+                } else {
+                    vUp[i] = vUp[i-1];
+                }
+
+                int x = vUp[i] - 1;
+                int y = x - start1 + start2 - k;
+                while (x >= start1 && y >= start2
+                        && left.charAt(x) == right.charAt(y)) {
+                    vUp[i] = x--;
+                    y--;
+                }
+                // Second step
+                if (delta % 2 == 0 && -d <= k && k <= d ) {
+                    if (vUp[i] <= vDown[i + delta]) {
+                        return buildSnake(vUp[i], k + start1 - start2, end1, end2);
+                    }
+                }
+            }
+        }
+
+        // this should not happen
+        throw new RuntimeException("Internal Error");
+    }
+    
+    /**
+     * Build a snake.
+     *
+     * @param start  the value of the start of the snake
+     * @param diag  the value of the diagonal of the snake
+     * @param end1  the value of the end of the first sequence to be compared
+     * @param end2  the value of the end of the second sequence to be compared
+     * @return the snake built
+     */
+    private Snake buildSnake(final int start, final int diag, final int end1, final int end2)
{
+        int end = start;
+        while (end - diag < end2
+                && end < end1
+                && left.charAt(end) == right.charAt(end - diag)) {
+            ++end;
+        }
+        return new Snake(start, end, diag);
+    }
+
+    /**
+     * This class is a simple placeholder to hold the end part of a path
+     * under construction in a {@link StringsComparator StringsComparator}.
+     */
+    private static class Snake {
+
+        /** Start index. */
+        private final int start;
+
+        /** End index. */
+        private final int end;
+
+        /** Diagonal number. */
+        private final int diag;
+
+        /**
+         * Simple constructor. Creates a new instance of Snake with specified indices.
+         *
+         * @param start  start index of the snake
+         * @param end  end index of the snake
+         * @param diag  diagonal number
+         */
+        public Snake(final int start, final int end, final int diag) {
+            this.start = start;
+            this.end   = end;
+            this.diag  = diag;
+        }
+
+        /**
+         * Get the start index of the snake.
+         *
+         * @return start index of the snake
+         */
+        public int getStart() {
+            return start;
+        }
+
+        /**
+         * Get the end index of the snake.
+         *
+         * @return end index of the snake
+         */
+        public int getEnd() {
+            return end;
+        }
+
+        /**
+         * Get the diagonal number of the snake.
+         *
+         * @return diagonal number of the snake
+         */
+        public int getDiag() {
+            return diag;
+        }
+    }
+    
+    public static void main(String[] args) {
+        StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do
Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil .");
+        EditScript es = sc.getScript();
+        es.visit(new CommandVisitor() {
+            
+            boolean nlAdd = true;
+            boolean nlRemove = true;
+
+            @Override
+            public void visitInsertCommand(Object object) {
+                if (nlAdd) {
+                    System.out.println();
+                    System.out.print("> ");
+                    nlAdd = false;
+                }
+                System.out.print(object);
+            }
+
+            @Override
+            public void visitKeepCommand(Object object) {
+                if (!nlAdd) {
+                    nlAdd = true;
+                }
+                if (!nlRemove) {
+                    nlRemove = true;
+                    System.out.println();
+                }
+                System.out.print(object);
+            }
+
+            @Override
+            public void visitDeleteCommand(Object object) {
+                if (nlRemove) {
+                    System.out.println();
+                    System.out.print("< ");
+                    nlRemove = false;
+                }
+                System.out.print(object);
+            }
+        });
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java
new file mode 100644
index 0000000..7a565d4
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * <p>Provides algorithms for diff between strings.</p>
+ *
+ * @since 1.0
+ */
+package org.apache.commons.text.diff;


Mime
View raw message