mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r891983 [4/47] - in /lucene/mahout/trunk: ./ core/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/ap...
Date Thu, 17 Dec 2009 23:22:41 GMT
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonMatrixAdapter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonMatrixAdapter.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonMatrixAdapter.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonMatrixAdapter.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.math;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
+import com.google.gson.reflect.TypeToken;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Type;
+
+public class JsonMatrixAdapter implements JsonSerializer<Matrix>,
+    JsonDeserializer<Matrix> {
+
+  private static final Logger log = LoggerFactory.getLogger(JsonMatrixAdapter.class);
+  public static final String CLASS = "class";
+  public static final String MATRIX = "matrix";
+
+  @Override
+  public JsonElement serialize(Matrix src, Type typeOfSrc,
+                               JsonSerializationContext context) {
+    Type vectorType = new TypeToken<Vector>() {
+    }.getType();
+    Type matrixType = new TypeToken<Matrix>() {
+    }.getType();
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(vectorType, new JsonVectorAdapter());
+    builder.registerTypeAdapter(matrixType, new JsonMatrixAdapter());
+    Gson gson = builder.create();
+    JsonObject obj = new JsonObject();
+    obj.add(CLASS, new JsonPrimitive(src.getClass().getName()));
+    obj.add(MATRIX, new JsonPrimitive(gson.toJson(src)));
+    return obj;
+  }
+
+  @Override
+  public Matrix deserialize(JsonElement json, Type typeOfT,
+                            JsonDeserializationContext context) throws JsonParseException {
+    Type vectorType = new TypeToken<Vector>() {
+    }.getType();
+    Type matrixType = new TypeToken<Matrix>() {
+    }.getType();
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(vectorType, new JsonVectorAdapter());
+    builder.registerTypeAdapter(matrixType, new JsonMatrixAdapter());
+    Gson gson = builder.create();
+    JsonObject obj = json.getAsJsonObject();
+    String klass = obj.get(CLASS).getAsString();
+    String matrix = obj.get(MATRIX).getAsString();
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    Class<?> cl = null;
+    try {
+      cl = ccl.loadClass(klass);
+    } catch (ClassNotFoundException e) {
+      log.warn("Error while loading class", e);
+    }
+    return (Matrix) gson.fromJson(matrix, cl);
+  }
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonMatrixAdapter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonVectorAdapter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonVectorAdapter.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonVectorAdapter.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonVectorAdapter.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.math;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Type;
+
+public class JsonVectorAdapter implements JsonSerializer<Vector>,
+    JsonDeserializer<Vector> {
+
+  private static final Logger log = LoggerFactory.getLogger(JsonVectorAdapter.class);
+  public static final String VECTOR = "vector";
+
+  @Override
+  public JsonElement serialize(Vector src, Type typeOfSrc,
+                               JsonSerializationContext context) {
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
+    Gson gson = builder.create();
+    JsonObject obj = new JsonObject();
+    obj.add(JsonMatrixAdapter.CLASS, new JsonPrimitive(src.getClass().getName()));
+    obj.add(VECTOR, new JsonPrimitive(gson.toJson(src)));
+    return obj;
+  }
+
+  @Override
+  public Vector deserialize(JsonElement json, Type typeOfT,
+                            JsonDeserializationContext context) throws JsonParseException {
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
+    Gson gson = builder.create();
+    JsonObject obj = json.getAsJsonObject();
+    String klass = obj.get(JsonMatrixAdapter.CLASS).getAsString();
+    String vector = obj.get(VECTOR).getAsString();
+    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
+    Class<?> cl = null;
+    try {
+      cl = ccl.loadClass(klass);
+    } catch (ClassNotFoundException e) {
+      log.warn("Error while loading class", e);
+    }
+    return (Vector) gson.fromJson(vector, cl);
+  }
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonVectorAdapter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,395 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import org.apache.hadoop.io.Writable;
+
+import java.util.Map;
+
+/** The basic interface including numerous convenience functions */
+public interface Matrix extends Cloneable, Writable {
+
+  /** @return a formatted String suitable for output */
+  String asFormatString();
+
+  /**
+   * Assign the value to all elements of the receiver
+   *
+   * @param value a double value
+   * @return the modified receiver
+   */
+  Matrix assign(double value);
+
+  /**
+   * Assign the values to the receiver
+   *
+   * @param values a double[] of values
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Matrix assign(double[][] values);
+
+  /**
+   * Assign the other vector values to the receiver
+   *
+   * @param other a Matrix
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Matrix assign(Matrix other);
+
+  /**
+   * Apply the function to each element of the receiver
+   *
+   * @param function a UnaryFunction to apply
+   * @return the modified receiver
+   */
+  Matrix assign(UnaryFunction function);
+
+  /**
+   * Apply the function to each element of the receiver and the corresponding element of the other argument
+   *
+   * @param other    a Matrix containing the second arguments to the function
+   * @param function a BinaryFunction to apply
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Matrix assign(Matrix other, BinaryFunction function);
+
+  /**
+   * Assign the other vector values to the column of the receiver
+   *
+   * @param column the int row to assign
+   * @param other  a Vector
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Matrix assignColumn(int column, Vector other);
+
+  /**
+   * Assign the other vector values to the row of the receiver
+   *
+   * @param row   the int row to assign
+   * @param other a Vector
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Matrix assignRow(int row, Vector other);
+
+  /**
+   * Return the cardinality of the recipient (the maximum number of values)
+   *
+   * @return an int[2]
+   */
+  int[] size();
+
+  /** Helper method to return the cardinality of the row dimension */
+  int numRows();
+
+  /** Helper method to return the cardinality of the column dimension */
+  int numCols();
+
+  /**
+   * Return a copy of the recipient
+   *
+   * @return a new Matrix
+   */
+  Matrix clone();
+
+  /**
+   * Returns matrix determinator using Laplace theorem
+   *
+   * @return a matrix determinator
+   */
+  double determinant();
+
+  /**
+   * Return a new matrix containing the values of the recipient divided by the argument
+   *
+   * @param x a double value
+   * @return a new Matrix
+   */
+  Matrix divide(double x);
+
+  /**
+   * Return the value at the given indexes
+   *
+   * @param row    an int row index
+   * @param column an int column index
+   * @return the double at the index
+   * @throws IndexException if the index is out of bounds
+   */
+  double get(int row, int column);
+
+  /**
+   * Return the column at the given index
+   *
+   * @param column an int column index
+   * @return a Vector at the index
+   * @throws IndexException if the index is out of bounds
+   */
+  Vector getColumn(int column);
+
+  /**
+   * Return the row at the given index
+   *
+   * @param row an int row index
+   * @return a Vector at the index
+   * @throws IndexException if the index is out of bounds
+   */
+  Vector getRow(int row);
+
+  /**
+   * Return the value at the given indexes, without checking bounds
+   *
+   * @param row    an int row index
+   * @param column an int column index
+   * @return the double at the index
+   */
+  double getQuick(int row, int column);
+
+  /**
+   * Return if the other matrix and the receiver share any underlying data cells
+   *
+   * @param other a Matrix
+   * @return true if the other matrix has common data cells
+   */
+  boolean haveSharedCells(Matrix other);
+
+  /**
+   * Return an empty matrix of the same underlying class as the receiver
+   *
+   * @return a Matrix
+   */
+  Matrix like();
+
+  /**
+   * Return an empty matrix of the same underlying class as the receiver and of the given cardinality
+   *
+   * @param rows    the int number of rows
+   * @param columns the int number of columns
+   */
+  Matrix like(int rows, int columns);
+
+  /**
+   * Return a new matrix containing the element by element difference of the recipient and the argument
+   *
+   * @param x a Matrix
+   * @return a new Matrix
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Matrix minus(Matrix x);
+
+  /**
+   * Return a new matrix containing the sum of each value of the recipient and the argument
+   *
+   * @param x a double
+   * @return a new Matrix
+   */
+  Matrix plus(double x);
+
+  /**
+   * Return a new matrix containing the element by element sum of the recipient and the argument
+   *
+   * @param x a Matrix
+   * @return a new Matrix
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Matrix plus(Matrix x);
+
+  /**
+   * Set the value at the given index
+   *
+   * @param row    an int row index into the receiver
+   * @param column an int column index into the receiver
+   * @param value  a double value to set
+   * @throws IndexException if the index is out of bounds
+   */
+  void set(int row, int column, double value);
+
+  void set(int row, double[] data);
+
+  /**
+   * Set the value at the given index, without checking bounds
+   *
+   * @param row    an int row index into the receiver
+   * @param column an int column index into the receiver
+   * @param value  a double value to set
+   */
+  void setQuick(int row, int column, double value);
+
+  /**
+   * Return the number of values in the recipient
+   *
+   * @return an int[2] containing [row, column] count
+   */
+  int[] getNumNondefaultElements();
+
+  /**
+   * Return a new matrix containing the product of each value of the recipient and the argument
+   *
+   * @param x a double argument
+   * @return a new Matrix
+   */
+  Matrix times(double x);
+
+  /**
+   * Return a new matrix containing the product of the recipient and the argument
+   *
+   * @param x a Matrix argument
+   * @return a new Matrix
+   * @throws CardinalityException if the cardinalities are incompatible
+   */
+  Matrix times(Matrix x);
+
+  /**
+   * Return a new vector with cardinality equal to getNumRows() of this matrix which is the matrix product of the
+   * recipient and the argument
+   *
+   * @param v a vector with cardinality equal to getNumCols() of the recipient
+   * @return a new vector (typically a DenseVector)
+   * @throws CardinalityException if this.getNumRows() != v.size()
+   */
+  Vector times(Vector v);
+
+  /**
+   * Convenience method for producing this.transpose().times(this.times(v)), which can be implemented with only one pass
+   * over the matrix, without making the transpose() call (which can be expensive if the matrix is sparse)
+   *
+   * @param v a vector with cardinality equal to getNumCols() of the recipient
+   * @return a new vector (typically a DenseVector) with cardinality equal to that of the argument.
+   * @throws CardinalityException if this.getNumCols() != v.size()
+   */
+  Vector timesSquared(Vector v);
+
+  /**
+   * Return a new matrix that is the transpose of the receiver
+   *
+   * @return the transpose
+   */
+  Matrix transpose();
+
+  /**
+   * Return a new matrix containing the subset of the recipient
+   *
+   * @param offset an int[2] offset into the receiver
+   * @param size   the int[2] size of the desired result
+   * @return a new Matrix
+   * @throws CardinalityException if the length is greater than the cardinality of the receiver
+   * @throws IndexException       if the offset is negative or the offset+length is outside of the receiver
+   */
+  Matrix viewPart(int[] offset, int[] size);
+
+  /**
+   * Return the sum of all the elements of the receiver
+   *
+   * @return a double
+   */
+  double zSum();
+
+  /**
+   * Return a map of the current column label bindings of the receiver
+   *
+   * @return a Map<String, Integer>
+   */
+  Map<String, Integer> getColumnLabelBindings();
+
+  /**
+   * Return a map of the current row label bindings of the receiver
+   *
+   * @return a Map<String, Integer>
+   */
+  Map<String, Integer> getRowLabelBindings();
+
+  /**
+   * Sets a map of column label bindings in the receiver
+   *
+   * @param bindings a Map<String, Integer> of label bindings
+   */
+  void setColumnLabelBindings(Map<String, Integer> bindings);
+
+  /**
+   * Sets a map of row label bindings in the receiver
+   *
+   * @param bindings a Map<String, Integer> of label bindings
+   */
+  void setRowLabelBindings(Map<String, Integer> bindings);
+
+  /**
+   * Return the value at the given labels
+   *
+   * @param rowLabel    a String row label
+   * @param columnLabel a String column label
+   * @return the double at the index
+   * @throws IndexException if the index is out of bounds
+   */
+  double get(String rowLabel, String columnLabel) throws IndexException,
+      UnboundLabelException;
+
+  /**
+   * Set the value at the given index
+   *
+   * @param rowLabel    a String row label
+   * @param columnLabel a String column label
+   * @param value       a double value to set
+   * @throws IndexException if the index is out of bounds
+   */
+  void set(String rowLabel, String columnLabel, double value) throws IndexException,
+      UnboundLabelException;
+
+  /**
+   * Set the value at the given index, updating the row and column label bindings
+   *
+   * @param rowLabel    a String row label
+   * @param columnLabel a String column label
+   * @param row         an int row index
+   * @param column      an int column index
+   * @param value       a double value
+   */
+  void set(String rowLabel, String columnLabel, int row, int column, double value) throws IndexException,
+      UnboundLabelException;
+
+  /**
+   * Sets the row values at the given row label
+   *
+   * @param rowLabel a String row label
+   * @param rowData  a double[] array of row data
+   */
+  void set(String rowLabel, double[] rowData);
+
+  /**
+   * Sets the row values at the given row index and updates the row labels
+   *
+   * @param rowLabel the String row label
+   * @param row      an int the row index
+   * @param rowData  a double[] array of row data
+   */
+  void set(String rowLabel, int row, double[] rowData);
+
+  /*
+   * Need stories for these but keeping them here for now.
+   * 
+   */
+  // void getNonZeros(IntArrayList jx, DoubleArrayList values);
+  // void foreachNonZero(IntDoubleFunction f);
+  // double aggregate(DoubleDoubleFunction aggregator, DoubleFunction map);
+  // double aggregate(Matrix other, DoubleDoubleFunction aggregator,
+  // DoubleDoubleFunction map);
+  // NewMatrix assign(Matrix y, DoubleDoubleFunction function, IntArrayList
+  // nonZeroIndexes);
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/MatrixView.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/MatrixView.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/MatrixView.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/MatrixView.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,175 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/** Implements subset view of a Matrix */
+public class MatrixView extends AbstractMatrix {
+
+  private Matrix matrix;
+
+  // the offset into the Matrix
+  private int[] offset;
+
+  // the cardinality of the view
+  private int[] cardinality;
+
+  public MatrixView() {
+    super();
+  }
+
+  /**
+   * Construct a view of the matrix with given offset and cardinality
+   *
+   * @param matrix      an underlying Matrix
+   * @param offset      the int[2] offset into the underlying matrix
+   * @param cardinality the int[2] cardinality of the view
+   */
+  public MatrixView(Matrix matrix, int[] offset, int[] cardinality) {
+    this.matrix = matrix;
+    this.offset = offset;
+    this.cardinality = cardinality;
+  }
+
+  @Override
+  public int[] size() {
+    return cardinality;
+  }
+
+  @Override
+  public Matrix clone() {
+    MatrixView clone = (MatrixView) super.clone();
+    clone.matrix = matrix.clone();
+    clone.offset = offset.clone();
+    clone.cardinality = cardinality.clone();
+    return clone;
+  }
+
+  @Override
+  public double getQuick(int row, int column) {
+    return matrix.getQuick(offset[ROW] + row, offset[COL] + column);
+  }
+
+  @Override
+  public Matrix like() {
+    return matrix.like(cardinality[ROW], cardinality[COL]);
+  }
+
+  @Override
+  public Matrix like(int rows, int columns) {
+
+    return matrix.like(rows, columns);
+  }
+
+  @Override
+  public void setQuick(int row, int column, double value) {
+    matrix.setQuick(offset[ROW] + row, offset[COL] + column, value);
+  }
+
+  @Override
+  public int[] getNumNondefaultElements() {
+    return cardinality;
+  }
+
+  @Override
+  public Matrix viewPart(int[] offset, int[] size) {
+    if (size[ROW] > cardinality[ROW] || size[COL] > cardinality[COL]) {
+      throw new CardinalityException();
+    }
+    if ((offset[ROW] < ROW || offset[ROW] + size[ROW] > cardinality[ROW])
+        || (offset[COL] < ROW || offset[COL] + size[COL] > cardinality[COL])) {
+      throw new IndexException();
+    }
+    int[] origin = offset.clone();
+    origin[ROW] += offset[ROW];
+    origin[COL] += offset[COL];
+    return new MatrixView(matrix, origin, size);
+  }
+
+  @Override
+  public boolean haveSharedCells(Matrix other) {
+    if (other instanceof MatrixView) {
+      return other == this || matrix.haveSharedCells(other);
+    } else {
+      return other.haveSharedCells(matrix);
+    }
+  }
+
+  @Override
+  public Matrix assignColumn(int column, Vector other) {
+    if (cardinality[ROW] != other.size()) {
+      throw new CardinalityException();
+    }
+    for (int row = 0; row < cardinality[ROW]; row++) {
+      matrix.setQuick(row + offset[ROW], column + offset[COL], other
+          .getQuick(row));
+    }
+    return this;
+  }
+
+  @Override
+  public Matrix assignRow(int row, Vector other) {
+    if (cardinality[COL] != other.size()) {
+      throw new CardinalityException();
+    }
+    for (int col = 0; col < cardinality[COL]; col++) {
+      matrix
+          .setQuick(row + offset[ROW], col + offset[COL], other.getQuick(col));
+    }
+    return this;
+  }
+
+  @Override
+  public Vector getColumn(int column) {
+    if (column < 0 || column >= cardinality[COL]) {
+      throw new IndexException();
+    }
+    return new VectorView(matrix.getColumn(column + offset[COL]), offset[ROW],
+        cardinality[ROW]);
+  }
+
+  @Override
+  public Vector getRow(int row) {
+    if (row < 0 || row >= cardinality[ROW]) {
+      throw new IndexException();
+    }
+    return new VectorView(matrix.getRow(row + offset[ROW]), offset[COL],
+        cardinality[COL]);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    this.offset = new int[]{in.readInt(), in.readInt()};
+    this.cardinality = new int[]{in.readInt(), in.readInt()};
+    this.matrix = readMatrix(in);
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    out.writeInt(offset[ROW]);
+    out.writeInt(offset[COL]);
+    out.writeInt(cardinality[ROW]);
+    out.writeInt(cardinality[COL]);
+    writeMatrix(out, this.matrix);
+  }
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/MatrixView.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/NegateFunction.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/NegateFunction.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/NegateFunction.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/NegateFunction.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+public class NegateFunction implements UnaryFunction {
+
+  @Override
+  public double apply(double arg1) {
+    return -arg1;
+  }
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/NegateFunction.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import java.io.Serializable;
+
+final class OrderedIntDoubleMapping implements Serializable, Cloneable {
+
+  private static final double DEFAULT_VALUE = 0.0;
+
+  private int[] indices;
+  private double[] values;
+  private int numMappings;
+
+  OrderedIntDoubleMapping() {
+    // no-arg constructor for deserializer
+    this(11);
+  }
+
+  OrderedIntDoubleMapping(int capacity) {
+    indices = new int[capacity];
+    values = new double[capacity];
+    numMappings = 0;
+  }
+
+  private OrderedIntDoubleMapping(int[] indices, double[] values, int numMappings) {
+    this.indices = indices;
+    this.values = values;
+    this.numMappings = numMappings;
+  }
+
+  int[] getIndices() {
+    return indices;
+  }
+
+  double[] getValues() {
+    return values;
+  }
+
+  int getNumMappings() {
+    return numMappings;
+  }
+
+  private void growTo(int newCapacity) {
+    if (newCapacity > indices.length) {
+      int[] newIndices = new int[newCapacity];
+      System.arraycopy(indices, 0, newIndices, 0, numMappings);
+      indices = newIndices;
+      double[] newValues = new double[newCapacity];
+      System.arraycopy(values, 0, newValues, 0, numMappings);
+      values = newValues;
+    }
+  }
+
+  private int find(int index) {
+    int low = 0;
+    int high = numMappings - 1;
+    while (low <= high) {
+      int mid = low + ((high - low) >>> 1);
+      int midVal = indices[mid];
+      if (midVal < index) {
+        low = mid + 1;
+      } else if (midVal > index) {
+        high = mid - 1;
+      } else {
+        return mid;
+      }
+    }
+    return -(low + 1);
+  }
+
+  public double get(int index) {
+    int offset = find(index);
+    return offset >= 0 ? values[offset] : DEFAULT_VALUE;
+  }
+
+  public void set(int index, double value) {
+    int offset = find(index);
+    if (offset >= 0) {
+      if (value == DEFAULT_VALUE) {
+        for (int i = offset + 1, j = offset; i < numMappings; i++, j++) {
+          indices[j] = indices[i];
+          values[j] = values[i];
+        }
+        numMappings--;
+      } else {
+        values[offset] = value;
+      }
+    } else {
+      if (value != DEFAULT_VALUE) {
+        if (numMappings >= indices.length) {
+          growTo(Math.max((int) (1.2 * numMappings), numMappings + 1));
+        }
+        int at = -offset - 1;
+        if (numMappings > at) {
+          for (int i = numMappings - 1, j = numMappings; i >= at; i--, j--) {
+            indices[j] = indices[i];
+            values[j] = values[i];
+          }
+        }
+        indices[at] = index;
+        values[at] = value;
+        numMappings++;
+      }
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    int result = 0;
+    for (int i = 0; i < numMappings; i++) {
+      result = 31 * result + indices[i];
+      result = 31 * result + (int) Double.doubleToRawLongBits(values[i]);
+    }
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof OrderedIntDoubleMapping) {
+      OrderedIntDoubleMapping other = (OrderedIntDoubleMapping) o;
+      if (numMappings == other.getNumMappings()) {
+        for (int i = 0; i < numMappings; i++) {
+          if (indices[i] != other.getIndices()[i] || values[i] != other.getValues()[i]) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder result = new StringBuilder(10 * numMappings);
+    for (int i = 0; i < numMappings; i++) {
+      result.append('(');
+      result.append(indices[i]);
+      result.append(',');
+      result.append(values[i]);
+      result.append(')');
+    }
+    return result.toString();
+  }
+
+  @Override
+  public OrderedIntDoubleMapping clone() {
+    return new OrderedIntDoubleMapping(indices.clone(), values.clone(), numMappings);
+  }
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Partitioning.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Partitioning.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Partitioning.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Partitioning.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,1263 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math;
+
+import org.apache.mahout.math.function.IntComparator;
+import org.apache.mahout.math.list.DoubleArrayList;
+import org.apache.mahout.math.list.IntArrayList;
+
+import java.util.Comparator;
+/**
+ * Given some interval boundaries, partitions arrays such that all elements falling into an interval are placed next to each other.
+ * <p>
+ * The algorithms partition arrays into two or more intervals. 
+ * They distinguish between <i>synchronously</i> partitioning either one, two or three arrays.
+ * They further come in templated versions, either partitioning <tt>int[]</tt> arrays or <tt>double[]</tt> arrays.
+ * <p>
+ * You may want to start out reading about the simplest case: Partitioning one <tt>int[]</tt> array into two intervals.
+ * To do so, read {@link #partition(int[],int,int,int)}.
+ *
+ * Next, building upon that foundation comes a method partitioning <tt>int[]</tt> arrays into multiple intervals.
+ * See {@link #partition(int[],int,int,int[],int,int,int[])} for related documentation.
+ * <p>
+ * All other methods are no different than the one's you now already understand, except that they operate on slightly different data types.
+ * <p>
+ * <b>Performance</b>
+ * <p>
+ * Partitioning into two intervals is <tt>O( N )</tt>.
+ * Partitioning into k intervals is <tt>O( N * log(k))</tt>.
+ * Constants factors are minimized.
+ * No temporary memory is allocated; Partitioning is in-place.
+ *
+ * @see org.apache.mahout.math.matrix.doublealgo.Partitioning
+ *
+ */
+
+/** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
+@Deprecated
+public class Partitioning {
+
+  private static final int SMALL = 7;
+  private static final int MEDIUM = 40;
+
+  // benchmark only
+  protected static int steps = 0;
+  //public static int swappedElements = 0;
+
+  /** Makes this class non instantiable, but still let's others inherit from it. */
+  private Partitioning() {
+  }
+
+  /**
+   * Finds the given key "a" within some generic data using the binary search algorithm.
+   *
+   * @param a    the index of the key to search for.
+   * @param from the leftmost search position, inclusive.
+   * @param to   the rightmost search position, inclusive.
+   * @param comp the comparator determining the order of the generic data. Takes as first argument the index <tt>a</tt>
+   *             within the generic splitters <tt>s</tt>. Takes as second argument the index <tt>b</tt> within the
+   *             generic data <tt>g</tt>.
+   * @return index of the search key, if it is contained in the list; otherwise, <tt>(-(<i>insertion point</i>) -
+   *         1)</tt>. The <i>insertion point</i> is defined as the the point at which the value would be inserted into
+   *         the list: the index of the first element greater than the key, or <tt>list.length</tt>, if all elements in
+   *         the list are less than the specified key.  Note that this guarantees that the return value will be &gt;= 0
+   *         if and only if the key is found.
+   */
+  private static int binarySearchFromTo(int a, int from, int to, IntComparator comp) {
+    while (from <= to) {
+      int mid = (from + to) / 2;
+      int comparison = comp.compare(mid, a);
+      if (comparison < 0) {
+        from = mid + 1;
+      } else if (comparison > 0) {
+        to = mid - 1;
+      } else {
+        return mid;
+      } // key found
+    }
+    return -(from + 1);  // key not found.
+  }
+
+  /**
+   * Same as {@link #dualPartition(int[],int[],int,int,int[],int,int,int[])} except that it <i>synchronously</i>
+   * partitions <tt>double[]</tt> rather than <tt>int[]</tt> arrays.
+   */
+  public static void dualPartition(double[] list, double[] secondary, int from, int to, double[] splitters,
+                                   int splitFrom, int splitTo, int[] splitIndexes) {
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(list, l, l + s, l + 2 * s);
+          m = med3(list, m - s, m, m + s);
+          n = med3(list, n - 2 * s, n - s, n);
+        }
+        m = med3(list, l, m, n); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo);
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    double splitter = splitters[medianIndex]; // int, double --> template type dependent
+
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    int splitIndex = dualPartition(list, secondary, from, to, splitter);
+    splitIndexes[medianIndex] = splitIndex;
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(splitter < splitters[i]))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(splitter > splitters[i]))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      dualPartition(list, secondary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      dualPartition(list, secondary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes);
+    }
+  }
+
+  /**
+   * Same as {@link #dualPartition(int[],int[],int,int,int)} except that it <i>synchronously</i> partitions
+   * <tt>double[]</tt> rather than <tt>int[]</tt> arrays.
+   */
+  public static int dualPartition(double[] list, double[] secondary, int from, int to, double splitter) {
+    for (int i = from - 1; ++i <= to;) {
+      double element = list[i];  // int, double --> template type dependent
+      if (element < splitter) {
+        // swap x[i] with x[from]
+        list[i] = list[from];
+        list[from] = element;
+
+        element = secondary[i];
+        secondary[i] = secondary[from];
+        secondary[from++] = element;
+      }
+    }
+    return from - 1;
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that this method <i>synchronously</i>
+   * partitions two arrays at the same time; both arrays are partially sorted according to the elements of the primary
+   * array. In other words, each time an element in the primary array is moved from index A to B, the correspoding
+   * element within the secondary array is also moved from index A to B. <p> <b>Use cases:</b> <p> Image having a large
+   * list of 2-dimensional points. If memory consumption and performance matter, it is a good idea to physically lay
+   * them out as two 1-dimensional arrays (using something like <tt>Point2D</tt> objects would be prohibitively
+   * expensive, both in terms of time and space). Now imagine wanting to histogram the points. We may want to partially
+   * sort the points by x-coordinate into intervals. This method efficiently does the job. <p> <b>Performance:</b> <p>
+   * Same as for single-partition methods.
+   */
+  public static void dualPartition(int[] list, int[] secondary, int from, int to, int[] splitters, int splitFrom,
+                                   int splitTo, int[] splitIndexes) {
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(list, l, l + s, l + 2 * s);
+          m = med3(list, m - s, m, m + s);
+          n = med3(list, n - 2 * s, n - s, n);
+        }
+        m = med3(list, l, m, n); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo);
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    int splitter = splitters[medianIndex]; // int, double --> template type dependent
+
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    int splitIndex = dualPartition(list, secondary, from, to, splitter);
+    splitIndexes[medianIndex] = splitIndex;
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(splitter < splitters[i]))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(splitter > splitters[i]))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      dualPartition(list, secondary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      dualPartition(list, secondary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes);
+    }
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int)} except that this method <i>synchronously</i> partitions two arrays at
+   * the same time; both arrays are partially sorted according to the elements of the primary array. In other words,
+   * each time an element in the primary array is moved from index A to B, the correspoding element within the secondary
+   * array is also moved from index A to B. <p> <b>Performance:</b> <p> Same as for single-partition methods.
+   */
+  public static int dualPartition(int[] list, int[] secondary, int from, int to, int splitter) {
+    for (int i = from - 1; ++i <= to;) {
+      int element = list[i];  // int, double --> template type dependent
+      if (element < splitter) {
+        // swap x[i] with x[from]
+        list[i] = list[from];
+        list[from] = element;
+
+        element = secondary[i];
+        secondary[i] = secondary[from];
+        secondary[from++] = element;
+      }
+    }
+    return from - 1;
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that it <i>generically</i> partitions
+   * arbitrary shaped data (for example matrices or multiple arrays) rather than <tt>int[]</tt> arrays. <p> This method
+   * operates on arbitrary shaped data and arbitrary shaped splitters. In fact, it has no idea what kind of data by what
+   * kind of splitters it is partitioning. Comparisons and swapping are delegated to user provided objects which know
+   * their data and can do the job. <p> Lets call the generic data <tt>g</tt> (it may be a matrix, one array, three
+   * linked lists or whatever). Lets call the generic splitters <tt>s</tt>. This class takes a user comparison function
+   * operating on two indexes <tt>(a,b)</tt>, namely an {@link IntComparator}. The comparison function determines
+   * whether <tt>s[a]</tt> is equal, less or greater than <tt>g[b]</tt>. This method can then decide to swap the data
+   * <tt>g[b]</tt> with the data <tt>g[c]</tt> (yes, <tt>c</tt>, not <tt>a</tt>). It calls a user provided {@link
+   * org.apache.mahout.math.Swapper} object that knows how to swap the data of these two indexes. <p> Again, note the
+   * details: Comparisons compare <tt>s[a]</tt> with <tt>g[b]</tt>. Swaps swap <tt>g[b]</tt> with <tt>g[c]</tt>. Prior
+   * to calling this method, the generic splitters <tt>s</tt> must be sorted ascending and must not contain multiple
+   * equal values. These preconditions are not checked; be sure that they are met.
+   *
+   * @param from         the index of the first element within <tt>g</tt> to be considered.
+   * @param to           the index of the last element within <tt>g</tt> to be considered. The method considers the
+   *                     elements <tt>g[from] .. g[to]</tt>.
+   * @param splitFrom    the index of the first splitter element to be considered.
+   * @param splitTo      the index of the last splitter element to be considered. The method considers the splitter
+   *                     elements <tt>s[splitFrom] .. s[splitTo]</tt>.
+   * @param splitIndexes a list into which this method fills the indexes of elements delimiting intervals. Upon return
+   *                     <tt>splitIndexes[splitFrom..splitTo]</tt> will be set accordingly. Therefore, must satisfy
+   *                     <tt>splitIndexes.length > splitTo</tt>.
+   * @param comp         the comparator comparing a splitter with an element of the generic data. Takes as first
+   *                     argument the index <tt>a</tt> within the generic splitters <tt>s</tt>. Takes as second argument
+   *                     the index <tt>b</tt> within the generic data <tt>g</tt>.
+   * @param comp2        the comparator to determine the order of the generic data. Takes as first argument the index
+   *                     <tt>a</tt> within the generic data <tt>g</tt>. Takes as second argument the index <tt>b</tt>
+   *                     within the generic data <tt>g</tt>.
+   * @param comp3        the comparator comparing a splitter with another splitter. Takes as first argument the index
+   *                     <tt>a</tt> within the generic splitters <tt>s</tt>. Takes as second argument the index
+   *                     <tt>b</tt> within the generic splitters <tt>g</tt>.
+   * @param swapper      an object that knows how to swap the elements at any two indexes (a,b). Takes as first argument
+   *                     the index <tt>b</tt> within the generic data <tt>g</tt>. Takes as second argument the index
+   *                     <tt>c</tt> within the generic data <tt>g</tt>.
+   *
+   *                     <p> Tip: Normally you will have <tt>splitIndexes.length == s.length</tt> as well as
+   *                     <tt>from==0, to==g.length-1</tt> and <tt>splitFrom==0, splitTo==s.length-1</tt>.
+   * @see Sorting#binarySearchFromTo(int,int,IntComparator)
+   */
+  public static void genericPartition(int from, int to, int splitFrom, int splitTo, int[] splitIndexes,
+                                      IntComparator comp, IntComparator comp2, IntComparator comp3, Swapper swapper) {
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(l, l + s, l + 2 * s, comp2);
+          m = med3(m - s, m, m + s, comp2);
+          n = med3(n - 2 * s, n - s, n, comp2);
+        }
+        m = med3(l, m, n, comp2); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = binarySearchFromTo(m, splitFrom, splitTo, comp);
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    int splitter = medianIndex; // int, double --> template type dependent
+
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    int splitIndex = genericPartition(from, to, splitter, comp, swapper);
+    splitIndexes[medianIndex] = splitIndex;
+
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(comp3.compare(splitter, i) < 0))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(comp3.compare(splitter, i) > 0))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      genericPartition(from, splitIndex, splitFrom, medianIndex - 1, splitIndexes, comp, comp2, comp3, swapper);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      genericPartition(splitIndex + 1, to, medianIndex + 1, splitTo, splitIndexes, comp, comp2, comp3, swapper);
+    }
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int)} except that it <i>generically</i> partitions arbitrary shaped data
+   * (for example matrices or multiple arrays) rather than <tt>int[]</tt> arrays.
+   */
+  private static int genericPartition(int from, int to, int splitter, IntComparator comp, Swapper swapper) {
+    for (int i = from - 1; ++i <= to;) {
+      if (comp.compare(splitter, i) > 0) {
+        // swap x[i] with x[from]
+        swapper.swap(i, from);
+        from++;
+      }
+    }
+    return from - 1;
+  }
+
+  /** Returns the index of the median of the three indexed elements. */
+  private static int med3(double[] x, int a, int b, int c) {
+    return (x[a] < x[b] ?
+        (x[b] < x[c] ? b : x[a] < x[c] ? c : a) :
+        (x[b] > x[c] ? b : x[a] > x[c] ? c : a));
+  }
+
+  /** Returns the index of the median of the three indexed elements. */
+  private static int med3(int[] x, int a, int b, int c) {
+    return (x[a] < x[b] ?
+        (x[b] < x[c] ? b : x[a] < x[c] ? c : a) :
+        (x[b] > x[c] ? b : x[a] > x[c] ? c : a));
+  }
+
+  /** Returns the index of the median of the three indexed chars. */
+  private static int med3(Object[] x, int a, int b, int c, Comparator<Object> comp) {
+    int ab = comp.compare(x[a], x[b]);
+    int ac = comp.compare(x[a], x[c]);
+    int bc = comp.compare(x[b], x[c]);
+    return (ab < 0 ?
+        (bc < 0 ? b : ac < 0 ? c : a) :
+        (bc > 0 ? b : ac > 0 ? c : a));
+  }
+
+  /** Returns the index of the median of the three indexed chars. */
+  private static int med3(int a, int b, int c, IntComparator comp) {
+    int ab = comp.compare(a, b);
+    int ac = comp.compare(a, c);
+    int bc = comp.compare(b, c);
+    return (ab < 0 ?
+        (bc < 0 ? b : ac < 0 ? c : a) :
+        (bc > 0 ? b : ac > 0 ? c : a));
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that it partitions <tt>double[]</tt> rather
+   * than <tt>int[]</tt> arrays.
+   */
+  public static void partition(double[] list, int from, int to, double[] splitters, int splitFrom, int splitTo,
+                               int[] splitIndexes) {
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(list, l, l + s, l + 2 * s);
+          m = med3(list, m - s, m, m + s);
+          n = med3(list, n - 2 * s, n - s, n);
+        }
+        m = med3(list, l, m, n); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo);
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    double splitter = splitters[medianIndex]; // int, double --> template type dependent
+
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    int splitIndex = partition(list, from, to, splitter);
+    splitIndexes[medianIndex] = splitIndex;
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(splitter < splitters[i]))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(splitter > splitters[i]))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      partition(list, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      partition(list, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes);
+    }
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int)} except that it partitions <tt>double[]</tt> rather than
+   * <tt>int[]</tt> arrays.
+   */
+  public static int partition(double[] list, int from, int to, double splitter) {
+    for (int i = from - 1; ++i <= to;) {
+      double element = list[i];  // int, double --> template type dependent
+      if (element < splitter) {
+        // swap x[i] with x[from]
+        list[i] = list[from];
+        list[from++] = element;
+      }
+    }
+    return from - 1;
+  }
+
+  /**
+   * Partitions (partially sorts) the given list such that all elements falling into some intervals are placed next to
+   * each other. Returns the indexes of elements delimiting intervals. <p> <b>Example:</b> <p> <tt>list = (7, 4, 5, 50,
+   * 6, 4, 3, 6), splitters = (5, 10, 30)</tt> defines the three intervals <tt>[-infinity,5), [5,10), [10,30)</tt>. Lets
+   * define to sort the entire list (<tt>from=0, to=7</tt>) using all splitters (<tt>splitFrom==0, splitTo=2</tt>). <p>
+   * The method modifies the list to be <tt>list = (4, 4, 3, 6, 7, 5, 6, 50)</tt> and returns the <tt>splitIndexes = (2,
+   * 6, 6)</tt>. In other words, <ul> <li>All values <tt>list[0..2]</tt> fall into <tt>[-infinity,5)</tt>. <li>All
+   * values <tt>list[3..6]</tt> fall into <tt>[5,10)</tt>. <li>All values <tt>list[7..6]</tt> fall into
+   * <tt>[10,30)</tt>, i.e. no elements, since <tt>7>6</tt>. <li>All values <tt>list[7 .. 7=list.length-1]</tt> fall
+   * into <tt>[30,infinity]</tt>. <li>In general, all values <tt>list[splitIndexes[j-1]+1 .. splitIndexes[j]]</tt> fall
+   * into interval <tt>j</tt>. </ul> As can be seen, the list is partially sorted such that values falling into a
+   * certain interval are placed next to each other. Note that <i>within</i> an interval, elements are entirelly
+   * unsorted. They are only sorted across interval boundaries. In particular, this partitioning algorithm is not
+   * <i>stable</i>: the relative order of elements is not preserved (Producing a stable algorithm would require no more
+   * than minor modifications to method partition(int[],int,int,int)). <p> More formally, this method guarantees that
+   * upon return <tt>for all j = splitFrom .. splitTo</tt> there holds: <br><tt>for all i = splitIndexes[j-1]+1 ..
+   * splitIndexes[j]: splitters[j-1] <= list[i] < splitters[j]</tt>. <p> <b>Performance:</b> <p> Let
+   * <tt>N=to-from+1</tt> be the number of elements to be partitioned. Let <tt>k=splitTo-splitFrom+1</tt> be the number
+   * of splitter elements. Then we have the following time complexities <ul> <li>Worst case:  <tt>O( N * log(k) )</tt>.
+   * <li>Average case: <tt>O( N * log(k) )</tt>. <li>Best case: <tt>O( N )</tt>. In general, the more uniform (skewed)
+   * the data is spread across intervals, the more performance approaches the worst (best) case. If no elements fall
+   * into the given intervals, running time is linear. </ul> No temporary memory is allocated; the sort is in-place. <p>
+   * <b>Implementation:</b> <p> The algorithm can be seen as a Bentley/McIlroy quicksort where swapping and insertion
+   * sort are omitted. It is designed to detect and take advantage of skew while maintaining good performance in the
+   * uniform case.
+   *
+   * @param list         the list to be partially sorted.
+   * @param from         the index of the first element within <tt>list</tt> to be considered.
+   * @param to           the index of the last element within <tt>list</tt> to be considered. The method considers the
+   *                     elements <tt>list[from] .. list[to]</tt>.
+   * @param splitters    the values at which the list shall be split into intervals. Must be sorted ascending and must
+   *                     not contain multiple identical values. These preconditions are not checked; be sure that they
+   *                     are met.
+   * @param splitFrom    the index of the first splitter element to be considered.
+   * @param splitTo      the index of the last splitter element to be considered. The method considers the splitter
+   *                     elements <tt>splitters[splitFrom] .. splitters[splitTo]</tt>.
+   * @param splitIndexes a list into which this method fills the indexes of elements delimiting intervals. Upon return
+   *                     <tt>splitIndexes[splitFrom..splitTo]</tt> will be set accordingly. Therefore, must satisfy
+   *                     <tt>splitIndexes.length > splitTo</tt>. <p> Tip: Normally you will have <tt>splitIndexes.length
+   *                     == splitters.length</tt> as well as <tt>from==0, to==list.length-1</tt> and <tt>splitFrom==0,
+   *                     splitTo==splitters.length-1</tt>.
+   * @see org.apache.mahout.math.Arrays
+   * @see org.apache.mahout.math.GenericSorting
+   * @see java.util.Arrays
+   */
+  public static void partition(int[] list, int from, int to, int[] splitters, int splitFrom, int splitTo,
+                               int[] splitIndexes) {
+    //int element;  // int, double --> template type dependent
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(list, l, l + s, l + 2 * s);
+          m = med3(list, m - s, m, m + s);
+          n = med3(list, n - 2 * s, n - s, n);
+        }
+        m = med3(list, l, m, n); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo);
+
+      //int key = list[m];
+      /*
+      if (splitTo-splitFrom+1 < 5) { // on short lists linear search is quicker
+        int i=splitFrom-1;
+        while (++i <= splitTo && list[i] < key);
+        if (i > splitTo || list[i] > key) i = -i-1; // not found
+        medianIndex = i;
+      }
+      */
+      //else {
+      /*
+
+        int low = splitFrom;
+        int high = splitTo;
+        int comparison;
+
+        int mid=0;
+        while (low <= high) {
+          mid = (low + high) / 2;
+          comparison = splitters[mid]-key;
+          if (comparison < 0) low = mid + 1;
+          else if (comparison > 0) high = mid - 1;
+          else break; //return mid; // key found
+        }
+        medianIndex = mid;
+        if (low > high) medianIndex = -(medianIndex + 1);  // key not found.
+      //}
+      */
+
+
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    int splitter = splitters[medianIndex];
+
+    //log.info("medianIndex="+medianIndex);
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    // Could simply call:
+    int splitIndex = partition(list, from, to, splitter);
+    // but for speed the code is manually inlined.
+    /*
+    steps += to-from+1;
+    int head = from;
+    for (int i=from-1; ++i<=to; ) { // swap all elements < splitter to front
+      element = list[i];
+      if (element < splitter) {
+        list[i] = list[head];
+        list[head++] = element;
+        //swappedElements++;
+      }
+    }
+    int splitIndex = head-1;
+    */
+
+
+    //log.info("splitIndex="+splitIndex);
+    splitIndexes[medianIndex] = splitIndex;
+
+    //if (splitFrom == splitTo) return; // done
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(splitter < splitters[i]))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(splitter > splitters[i]))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      //log.info("1.recursive: from="+from+", to="+splitIndex+", splitFrom="+splitFrom+", splitTo="+(medianIndex-1));
+      partition(list, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      //log.info("2.recursive: from="+(splitIndex+1)+", to="+to+", splitFrom="+(medianIndex+1)+", splitTo="+splitTo);
+      partition(list, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes);
+    }
+    //log.info("BACK TRACKING\n\n");
+  }
+
+  /**
+   * Partitions (partially sorts) the given list such that all elements falling into the given interval are placed next
+   * to each other. Returns the index of the element delimiting the interval. <p> <b>Example:</b> <p> <tt>list = (7, 4,
+   * 5, 50, 6, 4, 3, 6), splitter = 5</tt> defines the two intervals <tt>[-infinity,5), [5,+infinity]</tt>. <p> The
+   * method modifies the list to be <tt>list = (4, 4, 3, 50, 6, 7, 5, 6)</tt> and returns the split index <tt>2</tt>. In
+   * other words, <ul> <li>All values <tt>list[0..2]</tt> fall into <tt>[-infinity,5)</tt>. <li>All values
+   * <tt>list[3=2+1 .. 7=list.length-1]</tt> fall into <tt>[5,+infinity]</tt>. </ul> As can be seen, the list is
+   * partially sorted such that values falling into a certain interval are placed next to each other. Note that
+   * <i>within</i> an interval, elements are entirelly unsorted. They are only sorted across interval boundaries. In
+   * particular, this partitioning algorithm is not <i>stable</i>. <p> More formally, this method guarantees that upon
+   * return there holds: <ul> <li>for all <tt>i = from .. returnValue: list[i] < splitter</tt> and <li>for all <tt>i =
+   * returnValue+1 .. list.length-1: !(list[i] < splitter)</tt>. </ul> <p> <b>Performance:</b> <p> Let
+   * <tt>N=to-from+1</tt> be the number of elements to be partially sorted. Then the time complexity is <tt>O( N )</tt>.
+   * No temporary memory is allocated; the sort is in-place.
+   *
+   * <p>
+   *
+   * @param list     the list to be partially sorted.
+   * @param from     the index of the first element within <tt>list</tt> to be considered.
+   * @param to       the index of the last element within <tt>list</tt> to be considered. The method considers the
+   *                 elements <tt>list[from] .. list[to]</tt>.
+   * @param splitter the value at which the list shall be split.
+   * @return the index of the largest element falling into the interval <tt>[-infinity,splitter)</tt>, as seen after
+   *         partitioning.
+   */
+  public static int partition(int[] list, int from, int to, int splitter) {
+    steps += to - from + 1;
+
+    /*
+    log.info();
+    if (from<=to) {
+      log.info("SORT WORKING: from="+from+", to="+to+", splitter="+splitter);
+    }
+    else {
+      log.info("SORT WORKING: NOTHING TO DO.");
+    }
+    */
+
+
+    // returns index of last element < splitter
+
+
+    /*
+    for (int i=from-1; ++i<=to; ) {
+      if (list[i] < splitter) {
+        int element = list[i];
+        list[i] = list[from];
+        list[from++] = element;
+      }
+    }
+    */
+
+
+    for (int i = from - 1; ++i <= to;) {
+      int element = list[i];
+      if (element < splitter) {
+        // swap x[i] with x[from]
+        list[i] = list[from];
+        list[from++] = element;
+        //swappedElements++;
+      }
+    }
+    //if (from<=to) log.info("Swapped "+(head-from)+" elements");
+
+
+    /*
+    //JAL:
+    int first = from;
+    int last = to+1;
+    --first;
+    while (true) {
+      while (++first < last && list[first] < splitter);
+      while (first < --last && !(list[last] < splitter));
+      if (first >= last) return first-1;
+      int tmp = list[first];
+      list[first] = list[last];
+      list[last] = tmp;
+    }
+    */
+
+
+    /*
+    log.info("splitter="+splitter);
+    log.info("before="+new IntArrayList(list));
+    int head = from;
+    int trail = to;
+    int element;
+    while (head<=trail) {
+      head--;
+      while (++head < trail && list[head] < splitter);
+
+      trail++;
+      while (--trail > head && list[trail] >= splitter);
+
+      if (head != trail) {
+        element = list[head];
+        list[head] = list[trail];
+        list[trail] = element;
+      }
+      head++;
+      trail--;
+      log.info("after ="+new IntArrayList(list)+", head="+head);
+    }
+    */
+
+
+    /*
+    //log.info("splitter="+splitter);
+    //log.info("before="+new IntArrayList(list));
+    to++;
+    //int head = from;
+    int element;
+    //int oldHead;
+    while (--to >= from) {
+      element = list[to];
+      if (element < splitter) {
+        from--;
+        while (++from < to && list[from] < splitter);
+        //if (head != to) {
+          list[to] = list[from];
+          list[from++] = element;
+          //oldHead = list[head];
+          //list[head] = element;
+          //list[i] = oldHead;
+
+          //head++;
+        //}
+        //head++;
+      }
+      //log.info("after ="+new IntArrayList(list)+", head="+head);
+    }
+    */
+
+    /*
+    int i=from-1;
+    int head = from;
+    int trail = to;
+    while (++i <= trail) {
+      int element = list[i];
+      if (element < splitter) {
+        if (head == i) head++;
+        else {
+          // swap list[i] with list[from]
+          int oldHead = list[head];
+          int oldTrail = list[trail];
+          list[head++] = element;
+          list[i--] = oldTrail;
+          list[trail--] = oldHead;
+        }
+      }
+      //log.info(new IntArrayList(list));
+
+    }
+    */
+
+
+    return from - 1;
+    //return head-1;
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that it partitions <tt>Object[]</tt> rather
+   * than <tt>int[]</tt> arrays.
+   */
+  public static void partition(Object[] list, int from, int to, Object[] splitters, int splitFrom, int splitTo,
+                               int[] splitIndexes, Comparator<Object> comp) {
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(list, l, l + s, l + 2 * s, comp);
+          m = med3(list, m - s, m, m + s, comp);
+          n = med3(list, n - 2 * s, n - s, n, comp);
+        }
+        m = med3(list, l, m, n, comp); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo, comp);
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    Object splitter = splitters[medianIndex]; // int, double --> template type dependent
+
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    int splitIndex = partition(list, from, to, splitter, comp);
+    splitIndexes[medianIndex] = splitIndex;
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(comp.compare(splitter, splitters[i]) < 0))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(comp.compare(splitter, splitters[i]) > 0))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      partition(list, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes, comp);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      partition(list, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes, comp);
+    }
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int)} except that it <i>synchronously</i> partitions the objects of the
+   * given list by the order of the given comparator.
+   */
+  public static int partition(Object[] list, int from, int to, Object splitter, Comparator<Object> comp) {
+    for (int i = from - 1; ++i <= to;) {
+      Object element = list[i];  // int, double --> template type dependent
+      if (comp.compare(element, splitter) < 0) {
+        // swap x[i] with x[from]
+        list[i] = list[from];
+        list[from] = element;
+        from++;
+      }
+    }
+    return from - 1;
+  }
+
+  /**
+   * Equivalent to <tt>partition(list.elements(), from, to, splitters.elements(), 0, splitters.size()-1,
+   * splitIndexes.elements())</tt>.
+   */
+  public static void partition(DoubleArrayList list, int from, int to, DoubleArrayList splitters,
+                               IntArrayList splitIndexes) {
+    partition(list.elements(), from, to, splitters.elements(), 0, splitters.size() - 1, splitIndexes.elements());
+  }
+
+  /**
+   * Equivalent to <tt>partition(list.elements(), from, to, splitters.elements(), 0, splitters.size()-1,
+   * splitIndexes.elements())</tt>.
+   */
+  public static void partition(IntArrayList list, int from, int to, IntArrayList splitters, IntArrayList splitIndexes) {
+    partition(list.elements(), from, to, splitters.elements(), 0, splitters.size() - 1, splitIndexes.elements());
+  }
+
+  /**
+   * Same as {@link #triplePartition(int[],int[],int[],int,int,int[],int,int,int[])} except that it <i>synchronously</i>
+   * partitions <tt>double[]</tt> rather than <tt>int[]</tt> arrays.
+   */
+  public static void triplePartition(double[] list, double[] secondary, double[] tertiary, int from, int to,
+                                     double[] splitters, int splitFrom, int splitTo, int[] splitIndexes) {
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(list, l, l + s, l + 2 * s);
+          m = med3(list, m - s, m, m + s);
+          n = med3(list, n - 2 * s, n - s, n);
+        }
+        m = med3(list, l, m, n); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo);
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    double splitter = splitters[medianIndex]; // int, double --> template type dependent
+
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    int splitIndex = triplePartition(list, secondary, tertiary, from, to, splitter);
+    splitIndexes[medianIndex] = splitIndex;
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(splitter < splitters[i]))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(splitter > splitters[i]))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      triplePartition(list, secondary, tertiary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      triplePartition(list, secondary, tertiary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes);
+    }
+  }
+
+  /**
+   * Same as {@link #triplePartition(int[],int[],int[],int,int,int)} except that it <i>synchronously</i> partitions
+   * <tt>double[]</tt> rather than <tt>int[]</tt> arrays.
+   */
+  public static int triplePartition(double[] list, double[] secondary, double[] tertiary, int from, int to,
+                                    double splitter) {
+    for (int i = from - 1; ++i <= to;) {
+      double element = list[i];  // int, double --> template type dependent
+      if (element < splitter) {
+        // swap x[i] with x[from]
+        list[i] = list[from];
+        list[from] = element;
+
+        element = secondary[i];
+        secondary[i] = secondary[from];
+        secondary[from] = element;
+
+        element = tertiary[i];
+        tertiary[i] = tertiary[from];
+        tertiary[from++] = element;
+      }
+    }
+
+    return from - 1;
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that this method <i>synchronously</i>
+   * partitions three arrays at the same time; all three arrays are partially sorted according to the elements of the
+   * primary array. In other words, each time an element in the primary array is moved from index A to B, the
+   * correspoding element within the secondary array as well as the corresponding element within the tertiary array are
+   * also moved from index A to B. <p> <b>Use cases:</b> <p> Image having a large list of 3-dimensional points. If
+   * memory consumption and performance matter, it is a good idea to physically lay them out as three 1-dimensional
+   * arrays (using something like <tt>Point3D</tt> objects would be prohibitively expensive, both in terms of time and
+   * space). Now imagine wanting to histogram the points. We may want to partially sort the points by x-coordinate into
+   * intervals. This method efficiently does the job. <p> <b>Performance:</b> <p> Same as for single-partition methods.
+   */
+  public static void triplePartition(int[] list, int[] secondary, int[] tertiary, int from, int to, int[] splitters,
+                                     int splitFrom, int splitTo, int[] splitIndexes) {
+
+    if (splitFrom > splitTo) {
+      return;
+    } // nothing to do
+    if (from > to) { // all bins are empty
+      from--;
+      for (int i = splitFrom; i <= splitTo;) {
+        splitIndexes[i++] = from;
+      }
+      return;
+    }
+
+    // Choose a partition (pivot) index, m
+    // Ideally, the pivot should be the median, because a median splits a list into two equal sized sublists.
+    // However, computing the median is expensive, so we use an approximation.
+    int medianIndex;
+    if (splitFrom == splitTo) { // we don't really have a choice
+      medianIndex = splitFrom;
+    } else { // we do have a choice
+      int m = (from + to) / 2;       // Small arrays, middle element
+      int len = to - from + 1;
+      if (len > SMALL) {
+        int l = from;
+        int n = to;
+        if (len > MEDIUM) {        // Big arrays, pseudomedian of 9
+          int s = len / 8;
+          l = med3(list, l, l + s, l + 2 * s);
+          m = med3(list, m - s, m, m + s);
+          n = med3(list, n - 2 * s, n - s, n);
+        }
+        m = med3(list, l, m, n); // Mid-size, pseudomedian of 3
+      }
+
+      // Find the splitter closest to the pivot, i.e. the splitter that best splits the list into two equal sized sublists.
+      medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo);
+      if (medianIndex < 0) {
+        medianIndex = -medianIndex - 1;
+      } // not found
+      if (medianIndex > splitTo) {
+        medianIndex = splitTo;
+      } // not found, one past the end
+
+    }
+    int splitter = splitters[medianIndex]; // int, double --> template type dependent
+
+    // Partition the list according to the splitter, i.e.
+    // Establish invariant: list[i] < splitter <= list[j] for i=from..medianIndex and j=medianIndex+1 .. to
+    int splitIndex = triplePartition(list, secondary, tertiary, from, to, splitter);
+    splitIndexes[medianIndex] = splitIndex;
+
+    // Optimization: Handle special cases to cut down recursions.
+    if (splitIndex < from) { // no element falls into this bin
+      // all bins with splitters[i] <= splitter are empty
+      int i = medianIndex - 1;
+      while (i >= splitFrom && (!(splitter < splitters[i]))) {
+        splitIndexes[i--] = splitIndex;
+      }
+      splitFrom = medianIndex + 1;
+    } else if (splitIndex >= to) { // all elements fall into this bin
+      // all bins with splitters[i] >= splitter are empty
+      int i = medianIndex + 1;
+      while (i <= splitTo && (!(splitter > splitters[i]))) {
+        splitIndexes[i++] = splitIndex;
+      }
+      splitTo = medianIndex - 1;
+    }
+
+    // recursively partition left half
+    if (splitFrom <= medianIndex - 1) {
+      triplePartition(list, secondary, tertiary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes);
+    }
+
+    // recursively partition right half
+    if (medianIndex + 1 <= splitTo) {
+      triplePartition(list, secondary, tertiary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes);
+    }
+  }
+
+  /**
+   * Same as {@link #partition(int[],int,int,int)} except that this method <i>synchronously</i> partitions three arrays
+   * at the same time; all three arrays are partially sorted according to the elements of the primary array. In other
+   * words, each time an element in the primary array is moved from index A to B, the correspoding element within the
+   * secondary array as well as the corresponding element within the tertiary array are also moved from index A to B.
+   * <p> <b>Performance:</b> <p> Same as for single-partition methods.
+   */
+  public static int triplePartition(int[] list, int[] secondary, int[] tertiary, int from, int to, int splitter) {
+    for (int i = from - 1; ++i <= to;) {
+      int element = list[i];  // int, double --> template type dependent
+      if (element < splitter) {
+        // swap x[i] with x[from]
+        list[i] = list[from];
+        list[from] = element;
+
+        element = secondary[i];
+        secondary[i] = secondary[from];
+        secondary[from] = element;
+
+        element = tertiary[i];
+        tertiary[i] = tertiary[from];
+        tertiary[from++] = element;
+      }
+    }
+
+    return from - 1;
+  }
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/Partitioning.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message