mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From apalu...@apache.org
Subject [15/51] [partial] mahout git commit: (nojira) add native-viennaCL module to codebase. closes apache/mahout#241
Date Wed, 08 Jun 2016 21:40:12 GMT
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp
new file mode 100644
index 0000000..c9dec88
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/matrix_operations.hpp
@@ -0,0 +1,1303 @@
+#ifndef VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_
+#define VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/matrix_operations.hpp
+    @brief Implementations of dense matrix related operations including matrix-vector products.
+*/
+
+#include "viennacl/forwards.h"
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/vector_proxy.hpp"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/meta/enable_if.hpp"
+#include "viennacl/meta/predicate.hpp"
+#include "viennacl/meta/result_of.hpp"
+#include "viennacl/traits/size.hpp"
+#include "viennacl/traits/start.hpp"
+#include "viennacl/traits/handle.hpp"
+#include "viennacl/traits/stride.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/linalg/host_based/matrix_operations.hpp"
+
+#ifdef VIENNACL_WITH_OPENCL
+  #include "viennacl/linalg/opencl/matrix_operations.hpp"
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+  #include "viennacl/linalg/cuda/matrix_operations.hpp"
+#endif
+
+namespace viennacl
+{
+  namespace linalg
+  {
+
+    template<typename DestNumericT, typename SrcNumericT>
+    void convert(matrix_base<DestNumericT> & dest, matrix_base<SrcNumericT> const & src)
+    {
+      assert(viennacl::traits::size1(dest) == viennacl::traits::size1(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size1(m1) != size1(m2)"));
+      assert(viennacl::traits::size2(dest) == viennacl::traits::size2(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size2(m1) != size2(m2)"));
+
+      switch (viennacl::traits::handle(dest).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::convert(dest, src);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::convert(dest, src);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::convert(dest, src);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+    template<typename NumericT,
+              typename SizeT, typename DistanceT>
+    void trans(const matrix_expression<const matrix_base<NumericT, SizeT, DistanceT>,const matrix_base<NumericT, SizeT, DistanceT>, op_trans> & proxy,
+              matrix_base<NumericT> & temp_trans)
+    {
+      switch (viennacl::traits::handle(proxy).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::trans(proxy, temp_trans);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::trans(proxy,temp_trans);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::trans(proxy,temp_trans);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    template<typename NumericT,
+              typename ScalarType1>
+    void am(matrix_base<NumericT> & mat1,
+            matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
+    {
+      switch (viennacl::traits::handle(mat1).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    template<typename NumericT,
+              typename ScalarType1, typename ScalarType2>
+    void ambm(matrix_base<NumericT> & mat1,
+              matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
+              matrix_base<NumericT> const & mat3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
+    {
+      switch (viennacl::traits::handle(mat1).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::ambm(mat1,
+                                             mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                             mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::ambm(mat1,
+                                         mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                         mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::ambm(mat1,
+                                       mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                       mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    template<typename NumericT,
+              typename ScalarType1, typename ScalarType2>
+    void ambm_m(matrix_base<NumericT> & mat1,
+                matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
+                matrix_base<NumericT> const & mat3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
+    {
+      switch (viennacl::traits::handle(mat1).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::ambm_m(mat1,
+                                               mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                               mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::ambm_m(mat1,
+                                           mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                           mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::ambm_m(mat1,
+                                         mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                         mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    template<typename NumericT>
+    void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false)
+    {
+      switch (viennacl::traits::handle(mat).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::matrix_assign(mat, s, clear);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::matrix_assign(mat, s, clear);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::matrix_assign(mat, s, clear);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    template<typename NumericT>
+    void matrix_diagonal_assign(matrix_base<NumericT> & mat, NumericT s)
+    {
+      switch (viennacl::traits::handle(mat).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::matrix_diagonal_assign(mat, s);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::matrix_diagonal_assign(mat, s);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::matrix_diagonal_assign(mat, s);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    /** @brief Dispatcher interface for A = diag(v, k) */
+    template<typename NumericT>
+    void matrix_diag_from_vector(const vector_base<NumericT> & v, int k, matrix_base<NumericT> & A)
+    {
+      switch (viennacl::traits::handle(v).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::matrix_diag_from_vector(v, k, A);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::matrix_diag_from_vector(v, k, A);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::matrix_diag_from_vector(v, k, A);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+    /** @brief Dispatcher interface for v = diag(A, k) */
+    template<typename NumericT>
+    void matrix_diag_to_vector(const matrix_base<NumericT> & A, int k, vector_base<NumericT> & v)
+    {
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::matrix_diag_to_vector(A, k, v);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::matrix_diag_to_vector(A, k, v);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::matrix_diag_to_vector(A, k, v);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+    template<typename NumericT>
+    void matrix_row(const matrix_base<NumericT> & A, unsigned int i, vector_base<NumericT> & v)
+    {
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::matrix_row(A, i, v);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::matrix_row(A, i, v);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::matrix_row(A, i, v);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+    template<typename NumericT>
+    void matrix_column(const matrix_base<NumericT> & A, unsigned int j, vector_base<NumericT> & v)
+    {
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::matrix_column(A, j, v);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::matrix_column(A, j, v);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::matrix_column(A, j, v);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+    /** @brief Computes the Frobenius norm of a matrix - dispatcher interface
+    *
+    * @param A      The matrix
+    * @param result The result scalar
+    *
+    * Note that if A is strided or off-set, then a copy will be created.
+    */
+    template<typename T>
+    void norm_frobenius_impl(matrix_base<T> const & A,
+                             scalar<T> & result)
+    {
+      typedef typename matrix_base<T>::handle_type  HandleType;
+
+      if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) {
+        if (A.row_major()) {
+          viennacl::matrix<T, viennacl::row_major> temp_A(A);
+          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
+          norm_2_impl(temp, result);
+        } else {
+          viennacl::matrix<T, viennacl::column_major> temp_A(A);
+          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
+          norm_2_impl(temp, result);
+        }
+      } else {
+        viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1);
+        norm_2_impl(temp, result);
+      }
+
+    }
+
+    /** @brief Computes the Frobenius norm of a vector with final reduction on the CPU
+    *
+    * @param A      The matrix
+    * @param result The result scalar
+    *
+    * Note that if A is strided or off-set, then a copy will be created.
+    */
+    template<typename T>
+    void norm_frobenius_cpu(matrix_base<T> const & A,
+                            T & result)
+    {
+      typedef typename matrix_base<T>::handle_type  HandleType;
+
+      if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) {
+        if (A.row_major()) {
+          viennacl::matrix<T, viennacl::row_major> temp_A(A);
+          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
+          norm_2_cpu(temp, result);
+        } else {
+          viennacl::matrix<T, viennacl::column_major> temp_A(A);
+          viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
+          norm_2_cpu(temp, result);
+        }
+      } else {
+        viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1);
+        norm_2_cpu(temp, result);
+      }
+
+    }
+
+    //
+    /////////////////////////   matrix-vector products /////////////////////////////////
+    //
+
+
+
+    // A * x
+
+    /** @brief Carries out matrix-vector multiplication
+    *
+    * Implementation of the convenience expression result = prod(mat, vec);
+    *
+    * @param mat    The matrix
+    * @param vec    The vector
+    * @param result The result vector
+    */
+    template<typename NumericT>
+    void prod_impl(const matrix_base<NumericT> & mat,
+                   const vector_base<NumericT> & vec,
+                         vector_base<NumericT> & result)
+    {
+      assert( (viennacl::traits::size1(mat) == viennacl::traits::size(result)) && bool("Size check failed at v1 = prod(A, v2): size1(A) != size(v1)"));
+      assert( (viennacl::traits::size2(mat) == viennacl::traits::size(vec))    && bool("Size check failed at v1 = prod(A, v2): size2(A) != size(v2)"));
+
+      switch (viennacl::traits::handle(mat).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::prod_impl(mat, false, vec, result);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::prod_impl(mat, false, vec, result);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::prod_impl(mat, false, vec, result);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    // trans(A) * x
+
+    /** @brief Carries out matrix-vector multiplication with a transposed matrix
+    *
+    * Implementation of the convenience expression result = trans(mat) * vec;
+    *
+    * @param mat_trans  The transposed matrix proxy
+    * @param vec        The vector
+    * @param result     The result vector
+    */
+    template<typename NumericT>
+    void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans,
+                   const vector_base<NumericT> & vec,
+                         vector_base<NumericT> & result)
+    {
+      assert( (viennacl::traits::size1(mat_trans.lhs()) == viennacl::traits::size(vec))    && bool("Size check failed at v1 = trans(A) * v2: size1(A) != size(v2)"));
+      assert( (viennacl::traits::size2(mat_trans.lhs()) == viennacl::traits::size(result)) && bool("Size check failed at v1 = trans(A) * v2: size2(A) != size(v1)"));
+
+      switch (viennacl::traits::handle(mat_trans.lhs()).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::prod_impl(mat_trans.lhs(), true, vec, result);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::prod_impl(mat_trans.lhs(), true, vec, result);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::prod_impl(mat_trans.lhs(), true, vec, result);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    //
+    /////////////////////////   matrix-matrix products /////////////////////////////////
+    //
+
+    /** @brief Carries out matrix-matrix multiplication
+    *
+    * Implementation of C = prod(A, B);
+    *
+    */
+    template<typename NumericT, typename ScalarType >
+    void prod_impl(const matrix_base<NumericT> & A,
+                   const matrix_base<NumericT> & B,
+                         matrix_base<NumericT> & C,
+                   ScalarType alpha,
+                   ScalarType beta)
+    {
+      assert( (viennacl::traits::size1(A) == viennacl::traits::size1(C)) && bool("Size check failed at C = prod(A, B): size1(A) != size1(C)"));
+      assert( (viennacl::traits::size2(A) == viennacl::traits::size1(B)) && bool("Size check failed at C = prod(A, B): size2(A) != size1(B)"));
+      assert( (viennacl::traits::size2(B) == viennacl::traits::size2(C)) && bool("Size check failed at C = prod(A, B): size2(B) != size2(C)"));
+
+
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::prod_impl(A, false, B, false, C, alpha, beta);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::prod_impl(A, false, B, false, C, alpha, beta);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::prod_impl(A, false, B, false, C, alpha, beta);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+
+    /** @brief Carries out matrix-matrix multiplication
+    *
+    * Implementation of C = prod(trans(A), B);
+    *
+    */
+    template<typename NumericT, typename ScalarType >
+    void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>,
+                                                      const matrix_base<NumericT>,
+                                                      op_trans> & A,
+                   const matrix_base<NumericT> & B,
+                         matrix_base<NumericT> & C,
+                   ScalarType alpha,
+                   ScalarType beta)
+    {
+      assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(trans(A), B): size2(A) != size1(C)"));
+      assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size1(B) && bool("Size check failed at C = prod(trans(A), B): size1(A) != size1(B)"));
+      assert(viennacl::traits::size2(B)       == viennacl::traits::size2(C) && bool("Size check failed at C = prod(trans(A), B): size2(B) != size2(C)"));
+
+      switch (viennacl::traits::handle(A.lhs()).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+
+
+    /** @brief Carries out matrix-matrix multiplication
+    *
+    * Implementation of C = prod(A, trans(B));
+    *
+    */
+    template<typename NumericT, typename ScalarType >
+    void prod_impl(const matrix_base<NumericT> & A,
+                   const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B,
+                         matrix_base<NumericT> & C,
+                   ScalarType alpha,
+                   ScalarType beta)
+    {
+      assert(viennacl::traits::size1(A)       == viennacl::traits::size1(C)       && bool("Size check failed at C = prod(A, trans(B)): size1(A) != size1(C)"));
+      assert(viennacl::traits::size2(A)       == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(A, trans(B)): size2(A) != size2(B)"));
+      assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C)       && bool("Size check failed at C = prod(A, trans(B)): size1(B) != size2(C)"));
+
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+
+    /** @brief Carries out matrix-matrix multiplication
+    *
+    * Implementation of C = prod(trans(A), trans(B));
+    *
+    */
+    template<typename NumericT, typename ScalarType >
+    void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & A,
+                   const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B,
+                   matrix_base<NumericT> & C,
+                   ScalarType alpha,
+                   ScalarType beta)
+    {
+      assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C)       && bool("Size check failed at C = prod(trans(A), trans(B)): size2(A) != size1(C)"));
+      assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(trans(A), trans(B)): size1(A) != size2(B)"));
+      assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C)       && bool("Size check failed at C = prod(trans(A), trans(B)): size1(B) != size2(C)"));
+
+      switch (viennacl::traits::handle(A.lhs()).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+    ///////////////////////// summation operations /////////////
+
+    template<typename NumericT>
+    void row_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result)
+    {
+      viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size2(), NumericT(1), viennacl::traits::context(A));
+      viennacl::linalg::prod_impl(A, all_ones, result);
+    }
+
+    template<typename NumericT>
+    void column_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result)
+    {
+      viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size1(), NumericT(1), viennacl::traits::context(A));
+      viennacl::linalg::prod_impl(matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(A, A), all_ones, result);
+    }
+
+    ///////////////////////// Elementwise operations /////////////
+
+
+
+    /** @brief Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syntax). Don't use this function directly, use element_prod() and element_div().
+    *
+    * @param A      The result matrix (or -range, or -slice)
+    * @param proxy  The proxy object holding B, C, and the operation
+    */
+    template<typename T, typename OP>
+    void element_op(matrix_base<T> & A,
+                    matrix_expression<const matrix_base<T>, const matrix_base<T>, OP> const & proxy)
+    {
+      assert( (viennacl::traits::size1(A) == viennacl::traits::size1(proxy)) && bool("Size check failed at A = element_op(B): size1(A) != size1(B)"));
+      assert( (viennacl::traits::size2(A) == viennacl::traits::size2(proxy)) && bool("Size check failed at A = element_op(B): size2(A) != size2(B)"));
+
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::element_op(A, proxy);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::element_op(A, proxy);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::element_op(A, proxy);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+
+#define VIENNACL_MAKE_BINARY_OP(OPNAME)\
+    template<typename T>\
+    viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >\
+    element_##OPNAME(matrix_base<T> const & A, matrix_base<T> const & B)\
+    {\
+      return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >(A, B);\
+    }\
+\
+    template<typename M1, typename M2, typename OP, typename T>\
+    viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\
+                                const matrix_base<T>,\
+                                op_element_binary<op_##OPNAME> >\
+    element_##OPNAME(matrix_expression<const M1, const M2, OP> const & proxy, matrix_base<T> const & B)\
+    {\
+      return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\
+                                         const matrix_base<T>,\
+                                         op_element_binary<op_##OPNAME> >(proxy, B);\
+    }\
+\
+    template<typename T, typename M2, typename M3, typename OP>\
+    viennacl::matrix_expression<const matrix_base<T>,\
+                                const matrix_expression<const M2, const M3, OP>,\
+                                op_element_binary<op_##OPNAME> >\
+    element_##OPNAME(matrix_base<T> const & A, matrix_expression<const M2, const M3, OP> const & proxy)\
+    {\
+      return viennacl::matrix_expression<const matrix_base<T>,\
+                                         const matrix_expression<const M2, const M3, OP>,\
+                                         op_element_binary<op_##OPNAME> >(A, proxy);\
+    }\
+\
+    template<typename M1, typename M2, typename OP1,\
+              typename M3, typename M4, typename OP2>\
+    viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\
+                                const matrix_expression<const M3, const M4, OP2>,\
+                                op_element_binary<op_##OPNAME> >\
+    element_##OPNAME(matrix_expression<const M1, const M2, OP1> const & proxy1,\
+                 matrix_expression<const M3, const M4, OP2> const & proxy2)\
+    {\
+      return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\
+                                         const matrix_expression<const M3, const M4, OP2>,\
+                                         op_element_binary<op_##OPNAME> >(proxy1, proxy2);\
+    }
+
+    VIENNACL_MAKE_BINARY_OP(prod)
+    VIENNACL_MAKE_BINARY_OP(div)
+    VIENNACL_MAKE_BINARY_OP(pow)
+
+    VIENNACL_MAKE_BINARY_OP(eq)
+    VIENNACL_MAKE_BINARY_OP(neq)
+    VIENNACL_MAKE_BINARY_OP(greater)
+    VIENNACL_MAKE_BINARY_OP(less)
+    VIENNACL_MAKE_BINARY_OP(geq)
+    VIENNACL_MAKE_BINARY_OP(leq)
+
+#undef VIENNACL_GENERATE_BINARY_OP_OVERLOADS
+
+
+
+#define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \
+    template<typename T> \
+    viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> > \
+    element_##funcname(matrix_base<T> const & A) \
+    { \
+      return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> >(A, A); \
+    } \
+    template<typename LHS, typename RHS, typename OP> \
+    viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \
+                                const matrix_expression<const LHS, const RHS, OP>, \
+                                op_element_unary<op_##funcname> > \
+    element_##funcname(matrix_expression<const LHS, const RHS, OP> const & proxy) \
+    { \
+      return viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \
+                                         const matrix_expression<const LHS, const RHS, OP>, \
+                                         op_element_unary<op_##funcname> >(proxy, proxy); \
+    } \
+
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(abs)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(acos)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(asin)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(atan)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(ceil)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(cos)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(cosh)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(exp)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(fabs)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(floor)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(log)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(log10)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(sin)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(sinh)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(sqrt)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(tan)
+    VIENNACL_MAKE_UNARY_ELEMENT_OP(tanh)
+
+#undef VIENNACL_MAKE_UNARY_ELEMENT_OP
+
+
+    //
+    /////////////////////////   miscellaneous operations /////////////////////////////////
+    //
+
+
+    /** @brief Returns a proxy class for the operation mat += vec1 * vec2^T, i.e. a rank 1 update
+    *
+    * @param vec1    The first vector
+    * @param vec2    The second vector
+    */
+    template<typename NumericT>
+    viennacl::matrix_expression<const vector_base<NumericT>, const vector_base<NumericT>, op_prod>
+    outer_prod(const vector_base<NumericT> & vec1, const vector_base<NumericT> & vec2)
+    {
+      return viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>(vec1, vec2);
+    }
+
+
+    /** @brief The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update
+    *
+    * Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);
+    *
+    * @param mat1             The matrix to be updated
+    * @param alpha            The scaling factor (either a viennacl::scalar<>, float, or double)
+    * @param len_alpha        Length of the buffer for an eventual final reduction step (currently always '1')
+    * @param reciprocal_alpha Use 1/alpha instead of alpha
+    * @param flip_sign_alpha  Use -alpha instead of alpha
+    * @param vec1             The first vector
+    * @param vec2             The second vector
+    */
+    template<typename NumericT, typename S1>
+    void scaled_rank_1_update(matrix_base<NumericT> & mat1,
+                              S1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
+                              const vector_base<NumericT> & vec1,
+                              const vector_base<NumericT> & vec2)
+    {
+      switch (viennacl::traits::handle(mat1).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::scaled_rank_1_update(mat1,
+                                                             alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                                             vec1, vec2);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::scaled_rank_1_update(mat1,
+                                                         alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                                         vec1, vec2);
+          break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::scaled_rank_1_update(mat1,
+                                                       alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
+                                                       vec1, vec2);
+          break;
+#endif
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+    }
+
+    /** @brief This function stores the diagonal and the superdiagonal of a matrix in two vectors.
+    *
+    *
+    * @param A     The matrix from which the vectors will be extracted of.
+    * @param dh    The vector in which the diagonal of the matrix will be stored in.
+    * @param sh    The vector in which the superdiagonal of the matrix will be stored in.
+    */
+
+    template <typename NumericT, typename VectorType>
+    void bidiag_pack(matrix_base<NumericT> & A,
+                     VectorType & dh,
+                     VectorType & sh
+                    )
+    {
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::bidiag_pack(A, dh, sh);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::bidiag_pack(A, dh, sh);
+          break;
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::bidiag_pack(A, dh, sh);
+          break;
+#endif
+
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+
+
+    }
+    /** @brief This function copies a row or a column from a matrix to a vector.
+    *
+    *
+    * @param A          The matrix where to copy from.
+    * @param V          The vector to fill with data.
+    * @param row_start  The number of the first row to copy.
+    * @param col_start  The number of the first column to copy.
+    * @param copy_col   Set to TRUE to copy a column, FALSE to copy a row.
+    */
+
+    template <typename SCALARTYPE>
+    void copy_vec(matrix_base<SCALARTYPE>& A,
+                  vector_base<SCALARTYPE>& V,
+                  vcl_size_t row_start,
+                  vcl_size_t col_start,
+                  bool copy_col
+    )
+    {
+      switch (viennacl::traits::handle(A).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::copy_vec(A, V, row_start, col_start, copy_col);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+        case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::copy_vec(A, V, row_start, col_start, copy_col);
+          break;
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+        case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::copy_vec(A, V, row_start, col_start, copy_col);
+          break;
+#endif
+
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+      }
+
+    }
+
+    /** @brief This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P
+    *
+    * @param A       The matrix to be updated.
+    * @param D       The normalized householder vector.
+    * @param start   The repetition counter.
+    */
+  template <typename NumericT>
+  void house_update_A_left(matrix_base<NumericT> & A,
+                           vector_base<NumericT>    & D,
+                           vcl_size_t start)
+  {
+    switch (viennacl::traits::handle(A).get_active_handle_id())
+    {
+      case viennacl::MAIN_MEMORY:
+        viennacl::linalg::host_based::house_update_A_left(A, D, start);
+        break;
+#ifdef VIENNACL_WITH_OPENCL
+      case viennacl::OPENCL_MEMORY:
+        viennacl::linalg::opencl::house_update_A_left(A, D, start);
+        break;
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+      case viennacl::CUDA_MEMORY:
+        viennacl::linalg::cuda::house_update_A_left(A, D, start);
+        break;
+#endif
+
+      case viennacl::MEMORY_NOT_INITIALIZED:
+        throw memory_exception("not initialised!");
+      default:
+        throw memory_exception("not implemented");
+    }
+  }
+
+
+  /** @brief This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P
+  *
+  *
+  * @param A        The matrix to be updated.
+  * @param D        The normalized householder vector.
+  */
+
+  template <typename NumericT>
+  void house_update_A_right(matrix_base<NumericT>& A,
+                            vector_base<NumericT>   & D)
+  {
+    switch (viennacl::traits::handle(A).get_active_handle_id())
+    {
+      case viennacl::MAIN_MEMORY:
+        viennacl::linalg::host_based::house_update_A_right(A, D);
+        break;
+#ifdef VIENNACL_WITH_OPENCL
+      case viennacl::OPENCL_MEMORY:
+        viennacl::linalg::opencl::house_update_A_right(A, D);
+        break;
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+      case viennacl::CUDA_MEMORY:
+        viennacl::linalg::cuda::house_update_A_right(A, D);
+        break;
+#endif
+
+      case viennacl::MEMORY_NOT_INITIALIZED:
+        throw memory_exception("not initialised!");
+      default:
+        throw memory_exception("not implemented");
+    }
+  }
+
+  /** @brief This function updates the matrix Q, which is needed for the computation of the eigenvectors.
+  *
+  * @param Q        The matrix to be updated.
+  * @param D        The householder vector.
+  * @param A_size1  size1 of matrix A
+  */
+
+  template <typename NumericT>
+  void house_update_QL(matrix_base<NumericT> & Q,
+                       vector_base<NumericT>    & D,
+                       vcl_size_t A_size1)
+  {
+    switch (viennacl::traits::handle(Q).get_active_handle_id())
+    {
+      case viennacl::MAIN_MEMORY:
+        viennacl::linalg::host_based::house_update_QL(Q, D, A_size1);
+        break;
+#ifdef VIENNACL_WITH_OPENCL
+      case viennacl::OPENCL_MEMORY:
+        viennacl::linalg::opencl::house_update_QL(Q, D, A_size1);
+        break;
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+      case viennacl::CUDA_MEMORY:
+        viennacl::linalg::cuda::house_update_QL(Q, D, A_size1);
+        break;
+#endif
+
+      case viennacl::MEMORY_NOT_INITIALIZED:
+        throw memory_exception("not initialised!");
+      default:
+        throw memory_exception("not implemented");
+    }
+  }
+
+
+  /** @brief This function updates the matrix Q. It is part of the tql2 algorithm.
+  *
+  *
+  * @param Q       The matrix to be updated.
+  * @param tmp1    Vector with data from the tql2 algorithm.
+  * @param tmp2    Vector with data from the tql2 algorithm.
+  * @param l       Data from the tql2 algorithm.
+  * @param m       Data from the tql2 algorithm.
+  */
+  template<typename NumericT>
+  void givens_next(matrix_base<NumericT> & Q,
+                   vector_base<NumericT> & tmp1,
+                   vector_base<NumericT> & tmp2,
+                   int l,
+                   int m
+                )
+  {
+    switch (viennacl::traits::handle(Q).get_active_handle_id())
+    {
+      case viennacl::MAIN_MEMORY:
+        viennacl::linalg::host_based::givens_next(Q, tmp1, tmp2, l, m);
+        break;
+#ifdef VIENNACL_WITH_OPENCL
+      case viennacl::OPENCL_MEMORY:
+        viennacl::linalg::opencl::givens_next(Q, tmp1, tmp2, l, m);
+        break;
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+      case viennacl::CUDA_MEMORY:
+        viennacl::linalg::cuda::givens_next(Q, tmp1, tmp2, l, m);
+        break;
+#endif
+
+      case viennacl::MEMORY_NOT_INITIALIZED:
+        throw memory_exception("not initialised!");
+      default:
+        throw memory_exception("not implemented");
+    }
+  }
+
+  } //namespace linalg
+
+
+
+
+  //
+  /////////////////////////  Operator overloads /////////////////////////////////
+  //
+
+
+  //v += A * x
+  /** @brief Implementation of the operation v1 += A * v2, where A is a matrix
+  *
+  * @param v1     The result vector v1 where A * v2 is added to
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  vector<NumericT>
+  operator+=(vector_base<NumericT> & v1,
+             const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy)
+  {
+    assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 += A * v2: size1(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size1(proxy.lhs()));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    v1 += result;
+    return v1;
+  }
+
+  /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix
+  *
+  * @param v1     The result vector v1 where A * v2 is subtracted from
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  vector<NumericT>
+  operator-=(vector_base<NumericT> & v1,
+             const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy)
+  {
+    assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 -= A * v2: size1(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size1(proxy.lhs()));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    v1 -= result;
+    return v1;
+  }
+
+
+
+
+
+  //free functions:
+  /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix
+  *
+  * @param v1     The addend vector.
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  viennacl::vector<NumericT>
+  operator+(const vector_base<NumericT> & v1,
+            const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
+  {
+    assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 + A * v2: size1(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size(v1));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    result += v1;
+    return result;
+  }
+
+  /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix
+  *
+  * @param v1     The addend vector.
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  viennacl::vector<NumericT>
+  operator-(const vector_base<NumericT> & v1,
+            const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
+  {
+    assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 - A * v2: size1(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size(v1));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    result = v1 - result;
+    return result;
+  }
+
+
+  ////////// transposed_matrix_proxy
+
+
+  //v += A^T * x
+  /** @brief Implementation of the operation v1 += A * v2, where A is a matrix
+  *
+  * @param v1     The addend vector where the result is written to.
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  vector<NumericT>
+  operator+=(vector_base<NumericT> & v1,
+             const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
+                                                              const vector_base<NumericT>,
+                                                              op_prod> & proxy)
+  {
+    assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size2(proxy.lhs()));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    v1 += result;
+    return v1;
+  }
+
+  //v -= A^T * x
+  /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix
+  *
+  * @param v1     The addend vector where the result is written to.
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  vector<NumericT>
+  operator-=(vector_base<NumericT> & v1,
+             const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
+                                                              const vector_base<NumericT>,
+                                                              op_prod> & proxy)
+  {
+    assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size2(proxy.lhs()));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    v1 -= result;
+    return v1;
+  }
+
+
+  //free functions:
+  /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix
+  *
+  * @param v1     The addend vector.
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  vector<NumericT>
+  operator+(const vector_base<NumericT> & v1,
+            const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
+                                     const vector_base<NumericT>,
+                                     op_prod> & proxy)
+  {
+    assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 + trans(A) * v2: size2(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size(v1));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    result += v1;
+    return result;
+  }
+
+  /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix
+  *
+  * @param v1     The addend vector.
+  * @param proxy  An expression template proxy class.
+  */
+  template<typename NumericT>
+  vector<NumericT>
+  operator-(const vector_base<NumericT> & v1,
+            const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
+                                     const vector_base<NumericT>,
+                                     op_prod> & proxy)
+  {
+    assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 - trans(A) * v2: size2(A) != size(v1)"));
+
+    vector<NumericT> result(viennacl::traits::size(v1));
+    viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
+    result = v1 - result;
+    return result;
+  }
+
+
+} //namespace viennacl
+
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp
new file mode 100644
index 0000000..9269598
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/maxmin.hpp
@@ -0,0 +1,152 @@
+#ifndef VIENNACL_LINALG_MAXMIN_HPP_
+#define VIENNACL_LINALG_MAXMIN_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file norm_inf.hpp
+    @brief Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations.
+*/
+
+#include <cmath>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/meta/enable_if.hpp"
+#include "viennacl/meta/tag_of.hpp"
+#include "viennacl/meta/result_of.hpp"
+
+namespace viennacl
+{
+  //
+  // generic norm_inf function
+  //   uses tag dispatch to identify which algorithm
+  //   should be called
+  //
+  namespace linalg
+  {
+
+
+    // ----------------------------------------------------
+    // STL
+    //
+    template< typename NumericT >
+    NumericT max(std::vector<NumericT> const & v1)
+    {
+      //std::cout << "stl .. " << std::endl;
+      NumericT result = v1[0];
+      for (vcl_size_t i=1; i<v1.size(); ++i)
+      {
+        if (v1[i] > result)
+          result = v1[i];
+      }
+
+      return result;
+    }
+
+    // ----------------------------------------------------
+    // VIENNACL
+    //
+    template< typename ScalarType>
+    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                 const viennacl::vector_base<ScalarType>,
+                                 viennacl::op_max >
+    max(viennacl::vector_base<ScalarType> const & v1)
+    {
+       //std::cout << "viennacl .. " << std::endl;
+      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                          const viennacl::vector_base<ScalarType>,
+                                          viennacl::op_max >(v1, v1);
+    }
+
+    // with vector expression:
+    template<typename LHS, typename RHS, typename OP>
+    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                viennacl::op_max>
+    max(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
+    {
+      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          viennacl::op_max >(vector, vector);
+    }
+
+    // ----------------------------------------------------
+    // STL
+    //
+    template< typename NumericT >
+    NumericT min(std::vector<NumericT> const & v1)
+    {
+      //std::cout << "stl .. " << std::endl;
+      NumericT result = v1[0];
+      for (vcl_size_t i=1; i<v1.size(); ++i)
+      {
+        if (v1[i] < result)
+          result = v1[i];
+      }
+
+      return result;
+    }
+
+    // ----------------------------------------------------
+    // VIENNACL
+    //
+    template< typename ScalarType>
+    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                 const viennacl::vector_base<ScalarType>,
+                                 viennacl::op_min >
+    min(viennacl::vector_base<ScalarType> const & v1)
+    {
+       //std::cout << "viennacl .. " << std::endl;
+      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                          const viennacl::vector_base<ScalarType>,
+                                          viennacl::op_min >(v1, v1);
+    }
+
+    template< typename ScalarType>
+    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                 const viennacl::vector_base<ScalarType>,
+                                 viennacl::op_min >
+    min(viennacl::vector<ScalarType> const & v1)
+    {
+       //std::cout << "viennacl .. " << std::endl;
+      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                          const viennacl::vector_base<ScalarType>,
+                                          viennacl::op_min >(v1, v1);
+    }
+
+    // with vector expression:
+    template<typename LHS, typename RHS, typename OP>
+    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                viennacl::op_min>
+    min(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
+    {
+      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          viennacl::op_min >(vector, vector);
+    }
+
+
+
+  } // end namespace linalg
+} // end namespace viennacl
+#endif
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp
new file mode 100644
index 0000000..208573f
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/misc_operations.hpp
@@ -0,0 +1,94 @@
+#ifndef VIENNACL_LINALG_MISC_OPERATIONS_HPP_
+#define VIENNACL_LINALG_MISC_OPERATIONS_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/misc_operations.hpp
+    @brief Implementations of miscellaneous operations
+*/
+
+#include "viennacl/forwards.h"
+#include "viennacl/scalar.hpp"
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/linalg/host_based/misc_operations.hpp"
+
+#ifdef VIENNACL_WITH_OPENCL
+  #include "viennacl/linalg/opencl/misc_operations.hpp"
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+  #include "viennacl/linalg/cuda/misc_operations.hpp"
+#endif
+
+namespace viennacl
+{
+  namespace linalg
+  {
+
+    namespace detail
+    {
+
+      template<typename ScalarType>
+      void level_scheduling_substitute(vector<ScalarType> & vec,
+                                  viennacl::backend::mem_handle const & row_index_array,
+                                  viennacl::backend::mem_handle const & row_buffer,
+                                  viennacl::backend::mem_handle const & col_buffer,
+                                  viennacl::backend::mem_handle const & element_buffer,
+                                  vcl_size_t num_rows
+                                  )
+      {
+        assert( viennacl::traits::handle(vec).get_active_handle_id() == row_index_array.get_active_handle_id() && bool("Incompatible memory domains"));
+        assert( viennacl::traits::handle(vec).get_active_handle_id() ==      row_buffer.get_active_handle_id() && bool("Incompatible memory domains"));
+        assert( viennacl::traits::handle(vec).get_active_handle_id() ==      col_buffer.get_active_handle_id() && bool("Incompatible memory domains"));
+        assert( viennacl::traits::handle(vec).get_active_handle_id() ==  element_buffer.get_active_handle_id() && bool("Incompatible memory domains"));
+
+        switch (viennacl::traits::handle(vec).get_active_handle_id())
+        {
+          case viennacl::MAIN_MEMORY:
+            viennacl::linalg::host_based::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows);
+            break;
+#ifdef VIENNACL_WITH_OPENCL
+          case viennacl::OPENCL_MEMORY:
+            viennacl::linalg::opencl::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows);
+            break;
+#endif
+#ifdef VIENNACL_WITH_CUDA
+          case viennacl::CUDA_MEMORY:
+            viennacl::linalg::cuda::detail::level_scheduling_substitute(vec, row_index_array, row_buffer, col_buffer, element_buffer, num_rows);
+            break;
+#endif
+          case viennacl::MEMORY_NOT_INITIALIZED:
+            throw memory_exception("not initialised!");
+          default:
+            throw memory_exception("not implemented");
+        }
+      }
+
+
+
+
+    } //namespace detail
+
+
+  } //namespace linalg
+} //namespace viennacl
+
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp
new file mode 100644
index 0000000..78254b3
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/mixed_precision_cg.hpp
@@ -0,0 +1,199 @@
+#ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
+#define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/mixed_precision_cg.hpp
+    @brief The conjugate gradient method using mixed precision is implemented here. Experimental.
+*/
+
+#include <vector>
+#include <map>
+#include <cmath>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/linalg/ilu.hpp"
+#include "viennacl/linalg/prod.hpp"
+#include "viennacl/linalg/inner_prod.hpp"
+#include "viennacl/traits/clear.hpp"
+#include "viennacl/traits/size.hpp"
+#include "viennacl/meta/result_of.hpp"
+#include "viennacl/backend/memory.hpp"
+
+#include "viennacl/vector_proxy.hpp"
+
+namespace viennacl
+{
+  namespace linalg
+  {
+
+    /** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function
+    */
+    class mixed_precision_cg_tag
+    {
+      public:
+        /** @brief The constructor
+        *
+        * @param tol              Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||)
+        * @param max_iterations   The maximum number of iterations
+        * @param inner_tol        Inner tolerance for the low-precision iterations
+        */
+        mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {}
+
+        /** @brief Returns the relative tolerance */
+        double tolerance() const { return tol_; }
+        /** @brief Returns the relative tolerance */
+        float inner_tolerance() const { return inner_tol_; }
+        /** @brief Returns the maximum number of iterations */
+        unsigned int max_iterations() const { return iterations_; }
+
+        /** @brief Return the number of solver iterations: */
+        unsigned int iters() const { return iters_taken_; }
+        void iters(unsigned int i) const { iters_taken_ = i; }
+
+        /** @brief Returns the estimated relative error at the end of the solver run */
+        double error() const { return last_error_; }
+        /** @brief Sets the estimated relative error at the end of the solver run */
+        void error(double e) const { last_error_ = e; }
+
+
+      private:
+        double tol_;
+        unsigned int iterations_;
+        float inner_tol_;
+
+        //return values from solver
+        mutable unsigned int iters_taken_;
+        mutable double last_error_;
+    };
+
+
+    /** @brief Implementation of the conjugate gradient solver without preconditioner
+    *
+    * Following the algorithm in the book by Y. Saad "Iterative Methods for sparse linear systems"
+    *
+    * @param matrix     The system matrix
+    * @param rhs        The load vector
+    * @param tag        Solver configuration tag
+    * @return The result vector
+    */
+    template<typename MatrixType, typename VectorType>
+    VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
+    {
+      //typedef typename VectorType::value_type      ScalarType;
+      typedef typename viennacl::result_of::cpu_value_type<VectorType>::type    CPU_ScalarType;
+
+      //std::cout << "Starting CG" << std::endl;
+      vcl_size_t problem_size = viennacl::traits::size(rhs);
+      VectorType result(rhs);
+      viennacl::traits::clear(result);
+
+      VectorType residual = rhs;
+
+      CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
+      CPU_ScalarType new_ip_rr = 0;
+      CPU_ScalarType norm_rhs_squared = ip_rr;
+
+      if (norm_rhs_squared <= 0) //solution is zero if RHS norm is zero
+        return result;
+
+      viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
+      viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
+      viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
+      viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
+      float inner_ip_rr = static_cast<float>(ip_rr);
+      float new_inner_ip_rr = 0;
+      float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
+      float alpha;
+      float beta;
+
+      // transfer rhs to single precision:
+      p_low_precision = rhs;
+      residual_low_precision = p_low_precision;
+
+      // transfer matrix to single precision:
+      viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
+      viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() );
+      viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() );
+
+      viennacl::vector_base<CPU_ScalarType> matrix_elements_high_precision(const_cast<viennacl::backend::mem_handle &>(matrix.handle()), matrix.nnz(), 0, 1);
+      viennacl::vector_base<float>          matrix_elements_low_precision(matrix_low_precision.handle(), matrix.nnz(), 0, 1);
+      matrix_elements_low_precision = matrix_elements_high_precision;
+      matrix_low_precision.generate_row_block_information();
+
+      for (unsigned int i = 0; i < tag.max_iterations(); ++i)
+      {
+        tag.iters(i+1);
+
+        // lower precision 'inner iteration'
+        tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);
+
+        alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
+        result_low_precision += alpha * p_low_precision;
+        residual_low_precision -= alpha * tmp_low_precision;
+
+        new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);
+
+        beta = new_inner_ip_rr / inner_ip_rr;
+        inner_ip_rr = new_inner_ip_rr;
+
+        p_low_precision = residual_low_precision + beta * p_low_precision;
+
+        //
+        // If enough progress has been achieved, update current residual with high precision evaluation
+        // This is effectively a restart of the CG method
+        //
+        if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
+        {
+          residual = result_low_precision; // reusing residual vector as temporary buffer for conversion. Overwritten below anyway
+          result += residual;
+
+          // residual = b - Ax  (without introducing a temporary)
+          residual = viennacl::linalg::prod(matrix, result);
+          residual = rhs - residual;
+
+          new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
+          if (new_ip_rr / norm_rhs_squared < tag.tolerance() *  tag.tolerance())//squared norms involved here
+            break;
+
+          p_low_precision = residual;
+
+          result_low_precision.clear();
+          residual_low_precision = p_low_precision;
+          initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
+          inner_ip_rr = static_cast<float>(new_ip_rr);
+        }
+      }
+
+      //store last error estimate:
+      tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));
+
+      return result;
+    }
+
+    template<typename MatrixType, typename VectorType>
+    VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond)
+    {
+      return solve(matrix, rhs, tag);
+    }
+
+
+  }
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp
new file mode 100644
index 0000000..c962c8e
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/nmf.hpp
@@ -0,0 +1,91 @@
+#ifndef VIENNACL_LINALG_NMF_HPP
+#define VIENNACL_LINALG_NMF_HPP
+
+/* =========================================================================
+ Copyright (c) 2010-2016, Institute for Microelectronics,
+ Institute for Analysis and Scientific Computing,
+ TU Wien.
+ Portions of this software are copyright by UChicago Argonne, LLC.
+
+ -----------------
+ ViennaCL - The Vienna Computing Library
+ -----------------
+
+ Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+ (A list of authors and contributors can be found in the manual)
+
+ License:         MIT (X11), see file LICENSE in the base directory
+ ============================================================================= */
+
+/** @file viennacl/linalg/nmf.hpp
+ @brief Provides a nonnegative matrix factorization implementation.  Experimental.
+
+
+ */
+
+#include "viennacl/vector.hpp"
+#include "viennacl/matrix.hpp"
+#include "viennacl/linalg/prod.hpp"
+#include "viennacl/linalg/norm_2.hpp"
+#include "viennacl/linalg/norm_frobenius.hpp"
+
+#include "viennacl/linalg/host_based/nmf_operations.hpp"
+
+#ifdef VIENNACL_WITH_OPENCL
+#include "viennacl/linalg/opencl/kernels/nmf.hpp"
+#include "viennacl/linalg/opencl/nmf_operations.hpp"
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+#include "viennacl/linalg/cuda/nmf_operations.hpp"
+#endif
+
+namespace viennacl
+{
+  namespace linalg
+  {
+
+    /** @brief The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.
+     *
+     * @param V     Input matrix
+     * @param W     First factor
+     * @param H     Second factor
+     * @param conf  A configuration object holding tolerances and the like
+     */
+    template<typename ScalarType>
+    void nmf(viennacl::matrix_base<ScalarType> const & V, viennacl::matrix_base<ScalarType> & W,
+        viennacl::matrix_base<ScalarType> & H, viennacl::linalg::nmf_config const & conf)
+    {
+      assert(V.size1() == W.size1() && V.size2() == H.size2() && bool("Dimensions of W and H don't allow for V = W * H"));
+      assert(W.size2() == H.size1() && bool("Dimensions of W and H don't match, prod(W, H) impossible"));
+
+      switch (viennacl::traits::handle(V).get_active_handle_id())
+      {
+        case viennacl::MAIN_MEMORY:
+          viennacl::linalg::host_based::nmf(V, W, H, conf);
+          break;
+#ifdef VIENNACL_WITH_OPENCL
+          case viennacl::OPENCL_MEMORY:
+          viennacl::linalg::opencl::nmf(V,W,H,conf);
+          break;
+#endif
+
+#ifdef VIENNACL_WITH_CUDA
+          case viennacl::CUDA_MEMORY:
+          viennacl::linalg::cuda::nmf(V,W,H,conf);
+          break;
+#endif
+
+        case viennacl::MEMORY_NOT_INITIALIZED:
+          throw memory_exception("not initialised!");
+        default:
+          throw memory_exception("not implemented");
+
+      }
+
+    }
+  }
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp
new file mode 100644
index 0000000..e16238b
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_1.hpp
@@ -0,0 +1,104 @@
+#ifndef VIENNACL_LINALG_NORM_1_HPP_
+#define VIENNACL_LINALG_NORM_1_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file norm_1.hpp
+    @brief Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations.
+*/
+
+#include <cmath>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/meta/enable_if.hpp"
+#include "viennacl/meta/tag_of.hpp"
+
+namespace viennacl
+{
+  //
+  // generic norm_1 function
+  //   uses tag dispatch to identify which algorithm
+  //   should be called
+  //
+  namespace linalg
+  {
+
+    #ifdef VIENNACL_WITH_UBLAS
+    // ----------------------------------------------------
+    // UBLAS
+    //
+    template< typename VectorT >
+    typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value,
+                                  typename VectorT::value_type
+                                >::type
+    norm_1(VectorT const& vector)
+    {
+      // std::cout << "ublas .. " << std::endl;
+      return boost::numeric::ublas::norm_1(vector);
+    }
+    #endif
+
+
+    // ----------------------------------------------------
+    // STL
+    //
+    template< typename T, typename A >
+    T norm_1(std::vector<T, A> const & v1)
+    {
+      //std::cout << "stl .. " << std::endl;
+      T result = 0;
+      for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i)
+        result += std::fabs(v1[i]);
+
+      return result;
+    }
+
+    // ----------------------------------------------------
+    // VIENNACL
+    //
+    template< typename ScalarType>
+    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                 const viennacl::vector_base<ScalarType>,
+                                 viennacl::op_norm_1 >
+    norm_1(viennacl::vector_base<ScalarType> const & vector)
+    {
+      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                          const viennacl::vector_base<ScalarType>,
+                                          viennacl::op_norm_1 >(vector, vector);
+    }
+
+    // with vector expression:
+    template<typename LHS, typename RHS, typename OP>
+    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                viennacl::op_norm_1>
+    norm_1(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
+    {
+      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          viennacl::op_norm_1 >(vector, vector);
+    }
+
+  } // end namespace linalg
+} // end namespace viennacl
+#endif
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp
new file mode 100644
index 0000000..babb285
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_2.hpp
@@ -0,0 +1,140 @@
+#ifndef VIENNACL_LINALG_NORM_2_HPP_
+#define VIENNACL_LINALG_NORM_2_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file norm_2.hpp
+    @brief Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations.
+*/
+
+#include <cmath>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/meta/enable_if.hpp"
+#include "viennacl/meta/tag_of.hpp"
+
+namespace viennacl
+{
+  //
+  // generic norm_2 function
+  //   uses tag dispatch to identify which algorithm
+  //   should be called
+  //
+  namespace linalg
+  {
+    #ifdef VIENNACL_WITH_MTL4
+    // ----------------------------------------------------
+    // MTL4
+    //
+    template< typename VectorT >
+    typename viennacl::enable_if< viennacl::is_mtl4< typename viennacl::traits::tag_of< VectorT >::type >::value,
+                                  typename VectorT::value_type>::type
+    norm_2(VectorT const & v)
+    {
+      return mtl::two_norm(v);
+    }
+    #endif
+
+    #ifdef VIENNACL_WITH_ARMADILLO
+    // ----------------------------------------------------
+    // Armadillo
+    //
+    template<typename NumericT>
+    NumericT norm_2(arma::Col<NumericT> const& v)
+    {
+      return norm(v);
+    }
+    #endif
+
+    #ifdef VIENNACL_WITH_EIGEN
+    // ----------------------------------------------------
+    // EIGEN
+    //
+    template< typename VectorT >
+    typename viennacl::enable_if< viennacl::is_eigen< typename viennacl::traits::tag_of< VectorT >::type >::value,
+                                  typename VectorT::RealScalar>::type
+    norm_2(VectorT const & v)
+    {
+      return v.norm();
+    }
+    #endif
+
+
+    #ifdef VIENNACL_WITH_UBLAS
+    // ----------------------------------------------------
+    // UBLAS
+    //
+    template< typename VectorT >
+    typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value,
+                                  typename VectorT::value_type>::type
+    norm_2(VectorT const & v)
+    {
+      return boost::numeric::ublas::norm_2(v);
+    }
+    #endif
+
+
+    // ----------------------------------------------------
+    // STL
+    //
+    template< typename T, typename A >
+    T norm_2(std::vector<T, A> const & v1)
+    {
+      T result = 0;
+      for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i)
+        result += v1[i] * v1[i];
+
+      return std::sqrt(result);
+    }
+
+    // ----------------------------------------------------
+    // VIENNACL
+    //
+    template< typename ScalarType>
+    viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                 const viennacl::vector_base<ScalarType>,
+                                 viennacl::op_norm_2 >
+    norm_2(viennacl::vector_base<ScalarType> const & v)
+    {
+       //std::cout << "viennacl .. " << std::endl;
+      return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>,
+                                          const viennacl::vector_base<ScalarType>,
+                                          viennacl::op_norm_2 >(v, v);
+    }
+
+    // with vector expression:
+    template<typename LHS, typename RHS, typename OP>
+    viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                viennacl::op_norm_2>
+    norm_2(viennacl::vector_expression<const LHS, const RHS, OP> const & vector)
+    {
+      return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          const viennacl::vector_expression<const LHS, const RHS, OP>,
+                                          viennacl::op_norm_2>(vector, vector);
+    }
+
+
+  } // end namespace linalg
+} // end namespace viennacl
+#endif
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp
new file mode 100644
index 0000000..6873a53
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_frobenius.hpp
@@ -0,0 +1,73 @@
+#ifndef VIENNACL_LINALG_NORM_FROBENIUS_HPP_
+#define VIENNACL_LINALG_NORM_FROBENIUS_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= */
+
+/** @file viennacl/linalg/norm_frobenius.hpp
+    @brief Generic interface for the Frobenius norm.
+*/
+
+#include <cmath>
+#include "viennacl/forwards.h"
+#include "viennacl/tools/tools.hpp"
+#include "viennacl/meta/enable_if.hpp"
+#include "viennacl/meta/tag_of.hpp"
+
+namespace viennacl
+{
+  //
+  // generic norm_frobenius function
+  //   uses tag dispatch to identify which algorithm
+  //   should be called
+  //
+  namespace linalg
+  {
+
+    #ifdef VIENNACL_WITH_UBLAS
+    // ----------------------------------------------------
+    // UBLAS
+    //
+    template< typename VectorT >
+    typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value,
+                                  typename VectorT::value_type
+                                >::type
+    norm_frobenius(VectorT const& v1)
+    {
+      return boost::numeric::ublas::norm_frobenius(v1);
+    }
+    #endif
+
+
+    // ----------------------------------------------------
+    // VIENNACL
+    //
+    template<typename NumericT>
+    scalar_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_norm_frobenius>
+    norm_frobenius(const matrix_base<NumericT> & A)
+    {
+      return scalar_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_norm_frobenius>(A, A);
+    }
+
+  } // end namespace linalg
+} // end namespace viennacl
+#endif
+
+
+
+
+


Mime
View raw message