singa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wang...@apache.org
Subject [4/4] incubator-singa git commit: SINGA-200 - Implement Encoder and Decoder for data pre-processing
Date Thu, 23 Jun 2016 13:38:18 GMT
SINGA-200 - Implement Encoder and Decoder for data pre-processing

Improve the JPG2ProtoDecoder and Proto2JPGEncoder to consider the image
dimension order "HWC" or "CHW".
Change the input image Tensor's data type to kUChar, i.e., unsigned char.
Change Tensor::data() API, to cont SType* data<DType>() const;

TODO add other encoding functions to accept other inputs, e.g., cv::Mat.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/833f4619
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/833f4619
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/833f4619

Branch: refs/heads/dev
Commit: 833f46195a7a15bc369fb785b638e5f90445ca43
Parents: 13d60b0
Author: Wei Wang <wangwei@comp.nus.edu.sg>
Authored: Thu Jun 23 21:33:49 2016 +0800
Committer: Wei Wang <wangwei@comp.nus.edu.sg>
Committed: Thu Jun 23 21:33:49 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                       |   3 +-
 cmake/Templates/singa_config.h.in    |   2 +
 include/singa/core/tensor.h          |   6 +-
 include/singa/io/decoder.h           |  37 +++++----
 include/singa/io/encoder.h           |  46 ++++++-----
 src/core/tensor/tensor.cc            |   2 +
 src/io/jpg2proto_encoder.cc          |  95 ++++++++++++----------
 src/io/proto2jpg_decoder.cc          |  78 +++++++++++-------
 src/model/layer/prelu.cc             |   8 +-
 src/model/metric/accuracy.h          |   5 +-
 src/proto/core.proto                 |   3 +-
 src/proto/model.proto                |  15 ----
 test/singa/test_activation.cc        |   6 +-
 test/singa/test_adagrad.cc           |   8 +-
 test/singa/test_cross_entropy.cc     |   8 +-
 test/singa/test_cudnn_activation.cc  |   6 +-
 test/singa/test_cudnn_batchnorm.cc   |   8 +-
 test/singa/test_cudnn_convolution.cc |  16 ++--
 test/singa/test_cudnn_dropout.cc     |  10 +--
 test/singa/test_cudnn_lrn.cc         |   4 +-
 test/singa/test_cudnn_pooling.cc     |   4 +-
 test/singa/test_cudnn_softmax.cc     |  12 +--
 test/singa/test_decoder.cc           |  61 +++++++-------
 test/singa/test_dense.cc             |  16 ++--
 test/singa/test_dropout.cc           |  10 +--
 test/singa/test_flatten.cc           |   8 +-
 test/singa/test_initializer.cc       |  14 ++--
 test/singa/test_mse.cc               |   8 +-
 test/singa/test_nesterov.cc          |   8 +-
 test/singa/test_prelu.cc             |  12 +--
 test/singa/test_rmsprop.cc           |   8 +-
 test/singa/test_sgd.cc               |  16 ++--
 test/singa/test_softmax.cc           |   6 +-
 test/singa/test_tensor_math.cc       | 130 +++++++++++++++---------------
 34 files changed, 357 insertions(+), 322 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c61f8a4..87b3a5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,11 +18,10 @@ SET(SINGA_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include;${PROJECT_BINARY_DIR}")
 #message(STATUS "include path: ${SINGA_INCLUDE_DIR}")
 INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
 
-#OPTION(CPU_ONLY "use GPU libs" OFF)
 OPTION(USE_CBLAS "Use CBlas libs" ON)
 OPTION(USE_CUDA "Use Cuda libs" ON)
 OPTION(USE_CUDNN "Use Cudnn libs" ON)
-OPTION(USE_OPENCV "Use opencv" ON)
+OPTION(USE_OPENCV "Use opencv" OFF)
 OPTION(USE_LMDB "Use LMDB libs" OFF)
 OPTION(USE_PYTHON "Generate py wrappers" OFF)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/cmake/Templates/singa_config.h.in
----------------------------------------------------------------------
diff --git a/cmake/Templates/singa_config.h.in b/cmake/Templates/singa_config.h.in
index 5e8b32d..d6d3eeb 100644
--- a/cmake/Templates/singa_config.h.in
+++ b/cmake/Templates/singa_config.h.in
@@ -7,6 +7,8 @@
 #cmakedefine CPU_ONLY
 
 #cmakedefine USE_CBLAS
+
+#cmakedefine USE_OPENCV
 // cuda
 #cmakedefine USE_CUDA
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 5dfcab2..7e51f97 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -34,7 +34,7 @@ namespace singa {
 typedef vector<size_t> Shape;
 /// hardcode the width of types defined in DataType
 const size_t kDataWidth[] = {sizeof(float), sizeof(float) / 2, sizeof(int),
-                             sizeof(char), sizeof(double)};
+                             sizeof(char), sizeof(double), sizeof(unsigned char)};
 inline size_t SizeOf(DataType t) {
   static_assert(kNumDataType == sizeof(kDataWidth) / sizeof(size_t),
                 "Num of data types not match num of data width");
@@ -73,8 +73,8 @@ class Tensor {
 
   /// return immutable Tensor values with given type.
   template <typename SType>
-  SType data() const {
-    return static_cast<SType>(block()->data());
+  const SType* data() const {
+    return static_cast<const SType*>(block()->data());
   }
 
   /// data type, including kFloat16, kFloat32, kInt

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/include/singa/io/decoder.h
----------------------------------------------------------------------
diff --git a/include/singa/io/decoder.h b/include/singa/io/decoder.h
index 003dd59..dad2231 100644
--- a/include/singa/io/decoder.h
+++ b/include/singa/io/decoder.h
@@ -22,30 +22,35 @@
 #include <vector>
 #include <string>
 #include "singa/core/tensor.h"
-#include "singa/proto/model.pb.h"
+#include "singa/proto/io.pb.h"
 
 namespace singa {
-namespace io {
-
+/// The base decoder that converts a string into a set of tensors.
 class Decoder {
-  public:
-    Decoder() { }
-    virtual ~Decoder() { }
+ public:
+  Decoder() { }
+  virtual ~Decoder() { }
 
-    virtual void Setup(const DecoderConf& conf) = 0;
+  virtual void Setup(const DecoderConf& conf) {}
 
-    /**
-    * Decode value to get data and labels
-    */
-    virtual std::vector<Tensor> Decode(std::string value) = 0;
+  /// Decode value to get data and labels
+  virtual std::vector<Tensor> Decode(std::string value) = 0;
 };
 
+#ifdef USE_OPENCV
+/// Decode the string as an ImageRecord object and convert it into a image
+/// tensor (dtype is kFloat32) and a label tensor (dtype is kInt).
 class Proto2JPGDecoder : public Decoder {
-  public:
-    void Setup(const DecoderConf& conf) override;
-    std::vector<Tensor> Decode(std::string value) override;
+ public:
+  void Setup(const DecoderConf& conf) override {
+    image_dim_order_ = conf.image_dim_order();
+  }
+  std::vector<Tensor> Decode(std::string value) override;
+
+ private:
+  /// Indicate the dimension order for the output image tensor.
+  std::string image_dim_order_ = "CHW";
 };
-
-} // namespace io
+#endif
 } // namespace singa
 #endif // SINGA_IO_DECODER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/include/singa/io/encoder.h
----------------------------------------------------------------------
diff --git a/include/singa/io/encoder.h b/include/singa/io/encoder.h
index 5a8dbeb..5c1f242 100644
--- a/include/singa/io/encoder.h
+++ b/include/singa/io/encoder.h
@@ -22,32 +22,40 @@
 #include <vector>
 #include <string>
 #include "singa/core/tensor.h"
-#include "singa/proto/model.pb.h"
+#include "singa/proto/io.pb.h"
 
 namespace singa {
-namespace io {
 
+/// Base encoder class that convert a set of tensors into string for storage.
 class Encoder {
-  public:
-    Encoder() { }
-    virtual ~Encoder() { }
-    
-    virtual void Setup(const EncoderConf& conf) = 0;
-
-    /**
-     * Format each sample data as a string,
-     * whose structure depends on the proto definition.
-     * e.g., {key, shape, label, type, data, ...}
-     */
-    virtual std::string Encode(vector<Tensor>& data) = 0;
+ public:
+  Encoder() {}
+  virtual ~Encoder() {}
+
+  virtual void Setup(const EncoderConf& conf) {}
+
+  /// Format each sample data as a string,
+  /// whose structure depends on the proto definition.
+  virtual std::string Encode(vector<Tensor>& data) = 0;
 };
 
+#ifdef USE_OPENCV
+/// Convert an image and its label into an ImageRecord (protobuf message).
 class JPG2ProtoEncoder : public Encoder {
-  public:
-    void Setup(const EncoderConf& conf) override;
-    std::string Encode(vector<Tensor>& data) override;
-};
+ public:
+  void Setup(const EncoderConf& conf) override {
+    image_dim_order_ = conf.image_dim_order();
+  }
+  /// 'data' has two tesors, one for the image pixels (3D) and one for the
+  /// label. The image tensor's data type is kUChar.
+  /// The dimension order is indicated in the EncoderConf, i.e. image_dim_order.
+  /// The label tensor's data type is kInt.
+  std::string Encode(vector<Tensor>& data) override;
 
-} // namespace io
+ private:
+  /// Indicate the input image tensor's dimension order.
+  std::string image_dim_order_ = "HWC";
+};
+#endif  // USE_OPENCV
 } // namespace singa
 #endif  // SINGA_IO_ENCODER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 8afc17c..3aa5c0a 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -133,6 +133,8 @@ void Tensor::CopyDataFromHostPtr(const DType *src, const size_t num) {
     LOG(WARNING) << "Copy data from null host ptr";
   }
 }
+template void Tensor::CopyDataFromHostPtr(const unsigned char *src,
+                                          const size_t num);
 template void Tensor::CopyDataFromHostPtr(const float *src, const size_t num);
 template void Tensor::CopyDataFromHostPtr(const int *src, const size_t num);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/src/io/jpg2proto_encoder.cc
----------------------------------------------------------------------
diff --git a/src/io/jpg2proto_encoder.cc b/src/io/jpg2proto_encoder.cc
index 5b006e1..c5d81b8 100644
--- a/src/io/jpg2proto_encoder.cc
+++ b/src/io/jpg2proto_encoder.cc
@@ -17,54 +17,67 @@
  */
 
 #include "singa/io/encoder.h"
-#include "singa/proto/model.pb.h"
+
+#ifdef USE_OPENCV
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 
 namespace singa {
 
-namespace io{
-  void JPG2ProtoEncoder::Setup(const EncoderConf& conf) { return; }
-
-  std::string JPG2ProtoEncoder::Encode(vector<Tensor>& data) {
-    // suppose data[0]: data, data[1]: label
-    // data[0] has a shape as {height, width, channel}
-    CHECK_EQ(data[0].nDim(), 3u);
-    int height = data[0].shape()[0];
-    int width = data[0].shape()[1];
-    int channel = data[0].shape()[2];
-    cv::Mat mat = cv::Mat(height, width, CV_8UC3, cv::Scalar(0, 0, 0));
-    CHECK_EQ(height, mat.rows);
-    CHECK_EQ(width, mat.cols);
-    CHECK_EQ(channel, mat.channels());
+std::string JPG2ProtoEncoder::Encode(vector<Tensor>& data) {
+  // suppose image: image, data[1]: label
+  CHECK_LE(data.size(), 2u);
+  const Tensor& image = data.at(0);
+  CHECK_EQ(image.nDim(), 3u);
+  CHECK_EQ(image.data_type(), kUChar) << "Data type " << image.data_type()
+    << " is invalid for an raw image";
+  const auto* raw = image.data<unsigned char>();
+  cv::Mat mat;
+  if (image_dim_order_ == "HWC") {
+    size_t height = image.shape(0), width = image.shape(1),
+           channel = image.shape(2);
+    mat = cv::Mat(height, width, CV_8UC3, cv::Scalar(0, 0, 0));
+    for (size_t i = 0; i < height; i++)
+      for (size_t j = 0; j < width; j++)
+        for (size_t k = 0; k < channel; k++)
+          mat.at<cv::Vec3b>(i, j)[k] =
+              raw[i * width * channel + j * channel + k];
+  } else if (image_dim_order_ == "CHW") {
+    size_t channel = image.shape(0), height = image.shape(1),
+           width = image.shape(2);
+    mat = cv::Mat(height, width, CV_8UC3, cv::Scalar(0, 0, 0));
+    for (size_t i = 0; i < height; i++)
+      for (size_t j = 0; j < width; j++)
+        for (size_t k = 0; k < channel; k++)
+          mat.at<cv::Vec3b>(i, j)[k] = raw[k * height * width + i * width + j];
+  } else {
+    LOG(FATAL) << "Unknow dimension order for images " << image_dim_order_
+               << " Only support 'HWC' and 'CHW'";
+  }
 
-    if (data[0].data_type() != kInt)
-      LOG(FATAL) << "Data type " << data[0].data_type() <<" is invalid for an raw image";
-    const int* raw = data[0].data<const int*>();
-    for (int i = 0; i < height; i++)
-      for (int j = 0; j < width; j++)
-        for (int k = 0; k < channel; k++)
-	  mat.at<cv::Vec3b>(i, j)[k] = static_cast<uchar>(raw[i * width * channel + j * channel + k]);
-    // suppose each image is attached with only one label
-    const int* label = data[1].data<const int*>();
-    CHECK_EQ(label[0], 2);
+  // encode image with jpg format
+  std::vector<uchar> buff;
+  std::vector<int> param = std::vector<int>(2);
+  param[0] = CV_IMWRITE_JPEG_QUALITY;
+  param[1] = 100;  // default is 95
+  cv::imencode(".jpg", mat, buff, param);
+  std::string buf(buff.begin(), buff.end());
 
-    // encode image with jpg format
-    std::vector<uchar> buff;
-    std::vector<int> param = std::vector<int>(2);
-    param[0] = CV_IMWRITE_JPEG_QUALITY;
-    param[1] = 100; // default is 95
-    cv::imencode(".jpg", mat, buff, param);
-    std::string buf(buff.begin(), buff.end());
+  std::string output;
+  ImageRecord record;
+  for (size_t i = 0; i < image.nDim(); i++)
+    record.add_shape(image.shape(i));
+  record.set_pixel(buf);
 
-    std::string output;
-    ImageRecordProto image;
-    image.set_label(label[0]);
-    for (size_t i = 0; i < data[0].nDim(); i++)
-      image.add_shape(data[0].shape()[i]);
-    image.set_pixel(buf);
-    image.SerializeToString(&output);
-    return output;
+  // suppose each image is attached with at most one label
+  if (data.size() == 2) {
+    const int* label = data[1].data<int>();
+    CHECK_EQ(label[0], 2);
+    record.add_label(label[0]);
   }
+
+  record.SerializeToString(&output);
+  return output;
 }
-}
+}  // namespace singa
+#endif  // USE_OPENCV

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/src/io/proto2jpg_decoder.cc
----------------------------------------------------------------------
diff --git a/src/io/proto2jpg_decoder.cc b/src/io/proto2jpg_decoder.cc
index fd8d9b3..0125ea3 100644
--- a/src/io/proto2jpg_decoder.cc
+++ b/src/io/proto2jpg_decoder.cc
@@ -17,41 +17,59 @@
  */
 
 #include "singa/io/decoder.h"
-#include "singa/proto/model.pb.h"
+
+#ifdef USE_OPENCV
+
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 
 namespace singa {
 
-namespace io {
-  void Proto2JPGDecoder::Setup(const DecoderConf& conf) { return; }
-
-  std::vector<Tensor> Proto2JPGDecoder::Decode(std::string value) {
-    std::vector<Tensor> output;
-    ImageRecordProto image;
-    image.ParseFromString(value);
-    Shape shape(image.shape().begin(), image.shape().end());
-    Tensor raw(shape), label(Shape{1});
-    std::vector<uchar> pixel(image.pixel().begin(), image.pixel().end());
-
-    // decode image
-    cv::Mat mat = cv::imdecode(cv::Mat(pixel), CV_LOAD_IMAGE_COLOR);
-    int height = mat.rows, width = mat.cols, channel = mat.channels();
-    CHECK_EQ(shape[0], height);
-    CHECK_EQ(shape[1], width);
-    CHECK_EQ(shape[2], channel);
-
-    float* data = new float[raw.Size()];
-    for (int i = 0; i < height; i++)
-      for (int j = 0; j < width; j++)
-        for (int k = 0; k < channel; k++)
-          data[i * width * channel + j * channel + k] = static_cast<float>(static_cast<int>(mat.at<cv::Vec3b>(i, j)[k]));
-    raw.CopyDataFromHostPtr<float>(data, raw.Size());
-    float l = static_cast<float>(image.label());
-    label.CopyDataFromHostPtr(&l, 1);
-    output.push_back(raw);
+std::vector<Tensor> Proto2JPGDecoder::Decode(std::string value) {
+  std::vector<Tensor> output;
+
+  ImageRecord record;
+  record.ParseFromString(value);
+  std::vector<uchar> pixel(record.pixel().begin(), record.pixel().end());
+
+  // decode image
+  cv::Mat mat = cv::imdecode(cv::Mat(pixel), CV_LOAD_IMAGE_COLOR);
+  size_t height = mat.rows, width = mat.cols, channel = mat.channels();
+  Shape shape(record.shape().begin(), record.shape().end());
+  CHECK_EQ(shape[0], height);
+  CHECK_EQ(shape[1], width);
+  CHECK_EQ(shape[2], channel);
+  Tensor image(shape);
+
+  float* data = new float[image.Size()];
+  if (image_dim_order_ == "CHW") {
+    for (size_t i = 0; i < height; i++)
+      for (size_t j = 0; j < width; j++)
+        for (size_t k = 0; k < channel; k++)
+          data[k * height * width + i * width + j] = static_cast<float>(
+              static_cast<int>(mat.at<cv::Vec3b>(i, j)[k]));
+  } else if (image_dim_order_ == "HWC") {
+
+    for (size_t i = 0; i < height; i++)
+      for (size_t j = 0; j < width; j++)
+        for (size_t k = 0; k < channel; k++)
+          data[i * width * channel + j * channel + k] =
+              static_cast<float>(static_cast<int>(mat.at<cv::Vec3b>(i, j)[k]));
+  } else {
+    LOG(FATAL) << "Unknow dimension order for images " << image_dim_order_
+               << " Only support 'HWC' and 'CHW'";
+  }
+  image.CopyDataFromHostPtr<float>(data, image.Size());
+  output.push_back(image);
+  delete data;
+
+  if (record.label_size()) {
+    Tensor label(Shape{1}, &defaultDevice, kInt);
+    int labelid = record.label(0);
+    label.CopyDataFromHostPtr(&labelid, 1);
     output.push_back(label);
-    return output;
   }
+  return output;
 }
-}
+}  // namespace singa
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/src/model/layer/prelu.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/prelu.cc b/src/model/layer/prelu.cc
index 6c58dbb..83a56fa 100644
--- a/src/model/layer/prelu.cc
+++ b/src/model/layer/prelu.cc
@@ -63,7 +63,9 @@ const Tensor PReLU::Forward(int flag, const Tensor &input) {
     output = input * ((input > 0.f) + temp);
   } else {
     // share the first param of Tensor A along all channels
-    const float a = a_.data<const float *>()[0];
+    LOG(FATAL) << "Not implemented";
+  // TODO(wangwei) cannot access the data in this way. The data could be on GPU.
+    auto a = a_.data<float>()[0];
     output = input * ((input > 0.f) + (input <= 0.f) * a);
   }
   if (flag & kTrain) buf_.push(input);
@@ -122,7 +124,9 @@ const std::pair<Tensor, vector<Tensor> > PReLU::Backward(int flag,
     }
   } else {
     // share the first param of Tensor A along all channels
-    const float a = a_.data<const float *>()[0];
+    LOG(FATAL) << "Not Implemented";
+    // TODO(wangwei) cannot access the data in this way. The data could be on GPU.
+    auto a = a_.data<float>()[0];
     input_grad = grad * input * ((input > 0.f) + (input <= 0.f) * a);
     Tensor temp = grad * input * (input <= 0.f);
     float sum = Sum<float>(temp);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/src/model/metric/accuracy.h
----------------------------------------------------------------------
diff --git a/src/model/metric/accuracy.h b/src/model/metric/accuracy.h
index fb23634..69bd96b 100644
--- a/src/model/metric/accuracy.h
+++ b/src/model/metric/accuracy.h
@@ -49,7 +49,8 @@ Tensor Accuracy::Match(const Tensor& prediction, const vector<int>& target) {
   size_t nb_classes = prediction.Size() / batchsize;
   // each row of prediction is the prob distribution for one sample
   CHECK_EQ(prediction.shape().at(0), batchsize);
-  const float* prob = prediction.data<const float*>();
+  // TODO(wangwei) CloneToDevice(host);
+  const float* prob = prediction.data<float>();
   float* score = new float[batchsize];
   for (size_t b = 0; b < batchsize; b++) {
     vector<std::pair<float, int>> prob_class;
@@ -72,7 +73,7 @@ Tensor Accuracy::Match(const Tensor& prediction, const vector<int>& target) {
 Tensor Accuracy::Forward(const Tensor& prediction, const Tensor& target) {
   vector<int> target_vec;
   // TODO(wangwei) copy target to host.
-  const int* target_value = target.data<const int*>();
+  const int* target_value = target.data<int>();
   for (size_t i = 0; i < target.Size(); i++)
     target_vec.push_back(target_value[i]);
   return Match(prediction, target_vec);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/src/proto/core.proto
----------------------------------------------------------------------
diff --git a/src/proto/core.proto b/src/proto/core.proto
index 88d7f12..3031359 100644
--- a/src/proto/core.proto
+++ b/src/proto/core.proto
@@ -27,7 +27,8 @@ enum DataType {
   kInt = 2;
   kChar = 3;
   kDouble = 4;
-  kNumDataType = 5;
+  kUChar = 5;
+  kNumDataType = 6;
 }
 
 enum LangType {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 8666c30..c06deec 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -51,21 +51,6 @@ message BlobProto {
   optional int32 width = 4 [default = 0];
 }
 
-// rename RecordProto to ImageRecordProto
-message ImageRecordProto {
-  repeated int32 shape = 1;
-  optional int32 label = 2;
-  optional bytes pixel = 3;
-}
-
-message EncoderConf {
-  optional string type = 1 [default = 'jpg'];
-}
-
-message DecoderConf {
-  optional string type = 1 [default = 'jpg'];
-}
-
 message FillerConf {
   // The filler type, case insensitive
   optional string type = 1 [default = 'constant'];

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_activation.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_activation.cc b/test/singa/test_activation.cc
index 504d599..4b76d68 100644
--- a/test/singa/test_activation.cc
+++ b/test/singa/test_activation.cc
@@ -60,7 +60,7 @@ TEST(Activation, Forward) {
 
     singa::Tensor out = acti.Forward(singa::kTrain, in);
 
-    const float* yptr = out.data<const float*>();
+    const float* yptr = out.data<float>();
     EXPECT_EQ(n, out.Size());
 
     float* y = new float[n];
@@ -104,13 +104,13 @@ TEST(Activation, Backward) {
     acti.Setup(Shape{n}, conf);
 
     singa::Tensor out = acti.Forward(singa::kTrain, in);
-    const float* yptr = out.data<const float*>();
+    const float* yptr = out.data<float>();
 
     const float grad[] = {2.0f, -3.0f, 1.0f, 3.0f, -1.0f, -2.0};
     singa::Tensor out_diff(singa::Shape{n});
     out_diff.CopyDataFromHostPtr<float>(grad, n);
     const auto in_diff = acti.Backward(singa::kTrain, out_diff);
-    const float* xptr = in_diff.first.data<const float*>();
+    const float* xptr = in_diff.first.data<float>();
 
     float* dx = new float[n];
     if (acti.Mode() == "SIGMOID") {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_adagrad.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_adagrad.cc b/test/singa/test_adagrad.cc
index ef930d5..642e929 100644
--- a/test/singa/test_adagrad.cc
+++ b/test/singa/test_adagrad.cc
@@ -39,7 +39,7 @@ TEST(Adagrad, ApplyCPU) {
   adagrad.Apply(0, lr, "xx", grad, &value);
 
   singa::Tensor v1 = value.Clone();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   float history[4];
   for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i];
   for (int i = 0; i < 4; ++i)
@@ -49,7 +49,7 @@ TEST(Adagrad, ApplyCPU) {
   grad.CopyDataFromHostPtr(g, 4);
   adagrad.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; ++i) history[i] += g[i] * g[i];
 
   for (int i = 0; i < 4; ++i)
@@ -75,7 +75,7 @@ TEST(Adagrad, ApplyCUDA) {
 
   singa::Tensor v1 = value.Clone();
   v1.ToHost();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   float history[4];
   for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i];
   for (int i = 0; i < 4; ++i)
@@ -86,7 +86,7 @@ TEST(Adagrad, ApplyCUDA) {
   adagrad.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
   v2.ToHost();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; ++i) history[i] += g[i] * g[i];
 
   for (int i = 0; i < 4; ++i)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cross_entropy.cc b/test/singa/test_cross_entropy.cc
index 0eb36e5..ce60f7c 100644
--- a/test/singa/test_cross_entropy.cc
+++ b/test/singa/test_cross_entropy.cc
@@ -45,7 +45,7 @@ TEST_F(TestSoftmaxCrossEntropy, CppForward) {
 
   singa::SoftmaxCrossEntropy cross_entropy;
   const Tensor& loss = cross_entropy.Forward(p, t);
-  auto ldat = loss.data<const float*>();
+  auto ldat = loss.data<float>();
 
   const float result_test = -log(0.25);
   EXPECT_FLOAT_EQ(ldat[0], result_test);
@@ -61,7 +61,7 @@ TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
   cross_entropy.Forward(p, t);
   const Tensor& grad = cross_entropy.Backward();
 
-  auto gdat = grad.data<const float*>();
+  auto gdat = grad.data<float>();
   EXPECT_FLOAT_EQ(gdat[0], -0.75);
   EXPECT_FLOAT_EQ(gdat[1], 0.25);
   EXPECT_FLOAT_EQ(gdat[2], 0.25);
@@ -84,7 +84,7 @@ TEST_F(TestSoftmaxCrossEntropy, CudaForward) {
 
   Tensor loss = cross_entropy.Forward(p, t);
   loss.ToHost();
-  auto ldat = loss.data<const float*>();
+  auto ldat = loss.data<float>();
 
   const float result_test = -log(0.25);
   EXPECT_FLOAT_EQ(ldat[0], result_test);
@@ -103,7 +103,7 @@ TEST_F(TestSoftmaxCrossEntropy, CudaBackward) {
   Tensor grad = cross_entropy.Backward();
 
   grad.ToHost();
-  auto gdat = grad.data<const float*>();
+  auto gdat = grad.data<float>();
   EXPECT_FLOAT_EQ(gdat[0], -0.75);
   EXPECT_FLOAT_EQ(gdat[1], 0.25);
   EXPECT_FLOAT_EQ(gdat[2], 0.25);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_activation.cc b/test/singa/test_cudnn_activation.cc
index da8ec62..940c6b9 100644
--- a/test/singa/test_cudnn_activation.cc
+++ b/test/singa/test_cudnn_activation.cc
@@ -67,7 +67,7 @@ TEST(TCudnnActivation, Forward) {
     EXPECT_EQ(n, out.Size());
     singa::CppCPU host(0, 1);
     out.ToDevice(&host);
-    const float* yptr = out.data<const float*>();
+    const float* yptr = out.data<float>();
     float* y = new float[n];
     if (acti.Mode() == "SIGMOID") {
       for (size_t i = 0; i < n; i++) y[i] = 1.f / (1.f + exp(-x[i]));
@@ -105,7 +105,7 @@ TEST(TCudnnActivation, Backward) {
     EXPECT_EQ(n, out.Size());
     singa::CppCPU host(0, 1);
     out.ToDevice(&host);
-    const float* yptr = out.data<const float*>();
+    const float* yptr = out.data<float>();
 
     const float grad[] = {2.0f, 1.0f, 2.0f, 0.0f, -2.0f,
                           -1.0, 1.5,  2.5,  -1.5, -2.5};
@@ -114,7 +114,7 @@ TEST(TCudnnActivation, Backward) {
     const auto ret = acti.Backward(singa::kTrain, out_diff);
     singa::Tensor in_diff = ret.first;
     in_diff.ToDevice(&host);
-    const float* xptr = in_diff.data<const float*>();
+    const float* xptr = in_diff.data<float>();
     float* dx = new float[n];
     if (acti.Mode() == "SIGMOID") {
       for (size_t i = 0; i < n; i++) dx[i] = grad[i] * yptr[i] * (1. - yptr[i]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_batchnorm.cc b/test/singa/test_cudnn_batchnorm.cc
index ba090cb..b3b6477 100644
--- a/test/singa/test_cudnn_batchnorm.cc
+++ b/test/singa/test_cudnn_batchnorm.cc
@@ -77,7 +77,7 @@ TEST(CudnnBatchNorm, Forward) {
   singa::Tensor out = batchnorm.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out.ToHost();
-  const float *outptr = out.data<const float *>();
+  const float *outptr = out.data<float>();
   const auto & shape = out.shape();
   EXPECT_EQ(4u, shape.size());
   EXPECT_EQ(1u, shape[0]);
@@ -178,7 +178,7 @@ TEST(CudnnBatchNorm, Backward) {
   singa::CppCPU host(0, 1);
   singa::Tensor dx = ret.first;
   dx.ToDevice(&host);
-  const float *dxptr = dx.data<const float *>();
+  const float *dxptr = dx.data<float>();
   const auto & shape = dx.shape();
   EXPECT_EQ(4u, shape.size());
   EXPECT_EQ(1u, shape[0]);
@@ -220,7 +220,7 @@ TEST(CudnnBatchNorm, Backward) {
 
   singa::Tensor dbnScale = ret.second.at(0);
   dbnScale.ToDevice(&host);
-  const float *dbnScaleptr = dbnScale.data<const float *>();
+  const float *dbnScaleptr = dbnScale.data<float>();
   const auto & dbnScaleShape = dbnScale.shape();
   EXPECT_EQ(4u, dbnScaleShape.size());
   EXPECT_EQ(1u, dbnScaleShape[0]);
@@ -233,7 +233,7 @@ TEST(CudnnBatchNorm, Backward) {
 
   singa::Tensor dbnBias = ret.second.at(1);
   dbnBias.ToDevice(&host);
-  const float *dbnBiasptr = dbnBias.data<const float *>();
+  const float *dbnBiasptr = dbnBias.data<float>();
   const auto & dbnBiasShape = dbnBias.shape();
   EXPECT_EQ(4u, dbnBiasShape.size());
   EXPECT_EQ(1u, dbnBiasShape[0]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_convolution.cc b/test/singa/test_cudnn_convolution.cc
index 3aa70dd..44077b7 100644
--- a/test/singa/test_cudnn_convolution.cc
+++ b/test/singa/test_cudnn_convolution.cc
@@ -99,7 +99,7 @@ TEST(CudnnConvolution, Forward) {
   singa::Tensor out1 = conv.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out1.ToDevice(&host);
-  const float *outptr1 = out1.data<const float *>();
+  const float *outptr1 = out1.data<float>();
   // Input: 3*3; kernel: 3*3; stride: 2*2; padding: 1*1.
   EXPECT_EQ(4u, out1.Size());
 
@@ -161,7 +161,7 @@ TEST(CudnnConvolution, Backward) {
   singa::CppCPU host(0, 1);
   singa::Tensor in_grad = ret.first;
   in_grad.ToDevice(&host);
-  const float *dx = in_grad.data<const float *>();
+  const float *dx = in_grad.data<float>();
   const float *wptr = we;
   EXPECT_EQ(9u, in_grad.Size());
   EXPECT_EQ(dy[0] * wptr[4], dx[0]);
@@ -180,10 +180,10 @@ TEST(CudnnConvolution, Backward) {
   singa::Tensor db = ret.second[1];
   dw.ToDevice(&host);
   db.ToDevice(&host);
-  const float *dbptr = db.data<const float *>();
+  const float *dbptr = db.data<float>();
   EXPECT_EQ(dy[0] + dy[1] + dy[2] + dy[3], dbptr[0]);
 
-  const float *dwptr = dw.data<const float *>();
+  const float *dwptr = dw.data<float>();
   EXPECT_EQ(9u, dw.Size());
   EXPECT_EQ(dy[3] * x[4], dwptr[0]);
   EXPECT_EQ(dy[3] * x[5] + dy[2] * x[3], dwptr[1]);
@@ -271,7 +271,7 @@ TEST(CudnnConvolution_AT, Forward) {
   singa::Tensor out1 = conv.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out1.ToDevice(&host);
-  const float *outptr1 = out1.data<const float *>();
+  const float *outptr1 = out1.data<float>();
   // Input: 3*3; kernel: 3*3; stride: 2*2; padding: 1*1.
   EXPECT_EQ(4u, out1.Size());
 
@@ -333,7 +333,7 @@ TEST(CudnnConvolution_AT, Backward) {
   singa::CppCPU host(0, 1);
   singa::Tensor in_grad = ret.first;
   in_grad.ToDevice(&host);
-  const float *dx = in_grad.data<const float *>();
+  const float *dx = in_grad.data<float>();
   const float *wptr = we;
   EXPECT_EQ(9u, in_grad.Size());
   EXPECT_EQ(dy[0] * wptr[4], dx[0]);
@@ -352,10 +352,10 @@ TEST(CudnnConvolution_AT, Backward) {
   singa::Tensor db = ret.second[1];
   dw.ToDevice(&host);
   db.ToDevice(&host);
-  const float *dbptr = db.data<const float *>();
+  const float *dbptr = db.data<float>();
   EXPECT_EQ(dy[0] + dy[1] + dy[2] + dy[3], dbptr[0]);
 
-  const float *dwptr = dw.data<const float *>();
+  const float *dwptr = dw.data<float>();
   EXPECT_EQ(9u, dw.Size());
   EXPECT_EQ(dy[3] * x[4], dwptr[0]);
   EXPECT_EQ(dy[3] * x[5] + dy[2] * x[3], dwptr[1]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cudnn_dropout.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_dropout.cc b/test/singa/test_cudnn_dropout.cc
index b8ce068..419dd0c 100644
--- a/test/singa/test_cudnn_dropout.cc
+++ b/test/singa/test_cudnn_dropout.cc
@@ -64,13 +64,13 @@ TEST(CudnnDropout, Forward) {
 
   singa::Tensor mask(drop.mask().shape(), drop.mask().data_type());
   mask.CopyData(drop.mask());
-  const char* mptr = mask.data<const char*>();
+  const char* mptr = mask.data<char>();
   for (size_t i = 0; i < n; i++)
     EXPECT_FLOAT_EQ(0, GetBitValue(mptr, i) * (GetBitValue(mptr, i) - 1));
 
   singa::CppCPU host(0, 1);
   out1.ToDevice(&host);
-  const float* outptr1 = out1.data<const float*>();
+  const float* outptr1 = out1.data<float>();
   EXPECT_EQ(n, out1.Size());
   float scale = 1.0f / (1.0f - pdrop);
   // the output value should be 0 or the same as the input
@@ -81,7 +81,7 @@ TEST(CudnnDropout, Forward) {
   singa::Tensor out2 = drop.Forward(singa::kEval, in);
   out2.ToDevice(&host);
   EXPECT_EQ(n, out2.Size());
-  const float* outptr2 = out2.data<const float*>();
+  const float* outptr2 = out2.data<float>();
   // the output value should be the same as the input
   EXPECT_EQ(x[0], outptr2[0]);
   EXPECT_EQ(x[1], outptr2[1]);
@@ -113,11 +113,11 @@ TEST(CudnnDropout, Backward) {
   singa::CppCPU host(0, 1);
   singa::Tensor in_grad = ret.first;
   in_grad.ToDevice(&host);
-  const float* dx = in_grad.data<const float*>();
+  const float* dx = in_grad.data<float>();
 
   singa::Tensor mask(drop.mask().shape(), drop.mask().data_type());
   mask.CopyData(drop.mask());
-  const char* mptr = mask.data<const char*>();
+  const char* mptr = mask.data<char>();
 
 
   EXPECT_FLOAT_EQ(dx[0], dy[0] * GetBitValue(mptr, 0) * scale);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cudnn_lrn.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_lrn.cc b/test/singa/test_cudnn_lrn.cc
index 8576943..f7ec046 100644
--- a/test/singa/test_cudnn_lrn.cc
+++ b/test/singa/test_cudnn_lrn.cc
@@ -73,7 +73,7 @@ TEST(CudnnLRN, Forward) {
   singa::Tensor out = lrn.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out.ToDevice(&host);
-  const float *outptr = out.data<const float *>();
+  const float *outptr = out.data<float>();
   const auto & shape = out.shape();
   EXPECT_EQ(4u, shape.size());
   EXPECT_EQ(1u, shape[0]);
@@ -159,7 +159,7 @@ TEST(CudnnLRN, Backward) {
   singa::CppCPU host(0, 1);
   singa::Tensor dx = ret.first;
   dx.ToDevice(&host);
-  const float *dxptr = dx.data<const float *>();
+  const float *dxptr = dx.data<float>();
   const auto & shape = dx.shape();
   EXPECT_EQ(4u, shape.size());
   EXPECT_EQ(1u, shape[0]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cudnn_pooling.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_pooling.cc b/test/singa/test_cudnn_pooling.cc
index c7f9061..2a98ab4 100644
--- a/test/singa/test_cudnn_pooling.cc
+++ b/test/singa/test_cudnn_pooling.cc
@@ -76,7 +76,7 @@ TEST(CudnnPooling, Forward) {
   singa::Tensor out1 = pool.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out1.ToDevice(&host);
-  const float *outptr1 = out1.data<const float *>();
+  const float *outptr1 = out1.data<float>();
   // Input: 3*3; kernel: 2*2; stride: 1*1; no padding.
   EXPECT_EQ(4u, out1.Size());
   EXPECT_EQ(5.0f, outptr1[0]);
@@ -118,7 +118,7 @@ TEST(CudnnPooling, Backward) {
   singa::CppCPU host(0, 1);
   singa::Tensor in_grad = ret.first;
   in_grad.ToDevice(&host);
-  const float *dx = in_grad.data<const float *>();
+  const float *dx = in_grad.data<float>();
   EXPECT_EQ(9u, in_grad.Size());
   EXPECT_EQ(0.0f, dx[0]);
   EXPECT_EQ(0.0f, dx[1]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_cudnn_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_softmax.cc b/test/singa/test_cudnn_softmax.cc
index 067491f..53ecb2b 100644
--- a/test/singa/test_cudnn_softmax.cc
+++ b/test/singa/test_cudnn_softmax.cc
@@ -56,7 +56,7 @@ TEST(CudnnSoftmax, Forward1D) {
   singa::Tensor out = sft.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out.ToDevice(&host);
-  const float* yptr = out.data<const float*>();
+  const float* yptr = out.data<float>();
   EXPECT_EQ(n, out.Size());
 
   float* y = new float[n];
@@ -83,7 +83,7 @@ TEST(CudnnSoftmax, Backward1D) {
   singa::Tensor out = sft.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out.ToDevice(&host);
-  const float* yptr = out.data<const float*>();
+  const float* yptr = out.data<float>();
 
   const float grad[] = {2.f, -3.f, 1.f, 3.f, -1.f, -2.f};
   singa::Tensor out_diff(shape, &cuda);
@@ -91,7 +91,7 @@ TEST(CudnnSoftmax, Backward1D) {
   const auto ret = sft.Backward(singa::kTrain, out_diff);
   singa::Tensor in_diff = ret.first;
   in_diff.ToDevice(&host);
-  const float* xptr = in_diff.data<const float*>();
+  const float* xptr = in_diff.data<float>();
 
   float* dx = new float[n];
   float sigma = 0.f;
@@ -118,7 +118,7 @@ TEST(CudnnSoftmax, Forward2D) {
   singa::Tensor out = sft.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out.ToDevice(&host);
-  const float* yptr = out.data<const float*>();
+  const float* yptr = out.data<float>();
   EXPECT_EQ(n, out.Size());
 
   float* y = new float[n];
@@ -147,7 +147,7 @@ TEST(CudnnSoftmax, Backward2D) {
   singa::Tensor out = sft.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out.ToDevice(&host);
-  const float* yptr = out.data<const float*>();
+  const float* yptr = out.data<float>();
 
   const float grad[] = {2.f, -3.f, 1.f, 3.f, -1.f, -2.f};
   singa::Tensor out_diff(shape, &cuda);
@@ -155,7 +155,7 @@ TEST(CudnnSoftmax, Backward2D) {
   const auto ret = sft.Backward(singa::kTrain, out_diff);
   singa::Tensor in_diff = ret.first;
   in_diff.ToDevice(&host);
-  const float* xptr = in_diff.data<const float*>();
+  const float* xptr = in_diff.data<float>();
 
   float* dx = new float[n];
   float* sigma = new float[batch];

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_decoder.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_decoder.cc b/test/singa/test_decoder.cc
index 33c4cd4..fa8683f 100644
--- a/test/singa/test_decoder.cc
+++ b/test/singa/test_decoder.cc
@@ -19,18 +19,19 @@
 *
 *************************************************************/
 
-#include "../include/singa/io/encoder.h"
-#include "../include/singa/io/decoder.h"
+#include "singa/io/encoder.h"
+#include "singa/io/decoder.h"
 #include "gtest/gtest.h"
 #include <time.h>
+
+#ifdef USE_OPENCV
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
-
-using namespace singa;
-
+using singa::Shape;
+using singa::Tensor;
 TEST(Decoder, Decode) {
-  io::JPG2ProtoEncoder encoder;
-  io::Proto2JPGDecoder decoder;
+  singa::JPG2ProtoEncoder encoder;
+  singa::Proto2JPGDecoder decoder;
 
   // initial random seed
   srand(time(NULL));
@@ -38,50 +39,46 @@ TEST(Decoder, Decode) {
   size_t height = 40, width = 30;
   size_t nheight = 256, nwidth = 256, channel = 3;
   size_t total = nheight * nwidth * channel;
-  int raw_label = 2;
-  cv::Mat image(height, width, CV_8UC3); 
+  cv::Mat image(height, width, CV_8UC3);
   for (size_t i = 0; i < height; i++)
     for (size_t j = 0; j < width; j++)
       for (size_t k = 0; k < channel; k++)
         image.at<cv::Vec3b>(i, j)[k] = static_cast<uchar>(rand() % 256);
-  
+
   cv::Mat transformed;
   cv::Size size(nwidth, nheight);
   cv::resize(image, transformed, size);
-  EXPECT_EQ(nwidth, transformed.cols);
-  EXPECT_EQ(nheight, transformed.rows);
-  EXPECT_EQ(channel, transformed.channels());
-  
+  EXPECT_EQ(static_cast<int>(nwidth), transformed.cols);
+  EXPECT_EQ(static_cast<int>(nheight), transformed.rows);
+  EXPECT_EQ(static_cast<int>(channel), transformed.channels());
+
   unsigned char* buff = transformed.data;
-  int* buf = new int[total];
-  for (size_t i = 0; i < total; i++)
-    buf[i] = static_cast<int>(buff[i]);
   Shape shape{nheight, nwidth, channel};
-  Tensor pixel(shape, kInt), label(Shape{1}, kInt);
-  pixel.CopyDataFromHostPtr<int>(buf, total);
+  Tensor pixel(shape, singa::kUChar), label(Shape{1}, singa::kInt);
+  pixel.CopyDataFromHostPtr<unsigned char>(buff, total);
+  int raw_label = 2;
   label.CopyDataFromHostPtr<int>(&raw_label, 1);
 
   std::vector<Tensor> input;
   input.push_back(pixel);
-  input.push_back(label); 
-  const int* in_pixel = input[0].data<const int *>();
-  for(size_t i = 0; i < total; i++) 
-    EXPECT_EQ(buf[i], in_pixel[i]);
-  const int* in_label = input[1].data<const int *>();
+  input.push_back(label);
+  const auto* in_pixel = input[0].data<unsigned char>();
+  for (size_t i = 0; i < total; i++) EXPECT_EQ(buff[i], in_pixel[i]);
+  const int* in_label = input[1].data<int>();
   EXPECT_EQ(2, in_label[0]);
-  EXPECT_EQ(2, input.size());
+  EXPECT_EQ(2u, input.size());
 
   std::string tmp = encoder.Encode(input);
   std::vector<Tensor> output = decoder.Decode(tmp);
-  EXPECT_EQ(2, output.size());
-  EXPECT_EQ(kFloat32, output[0].data_type());
+  EXPECT_EQ(2u, output.size());
+  EXPECT_EQ(singa::kFloat32, output[0].data_type());
   Shape out_shape = output[0].shape();
-  for (size_t i = 0; i < shape.size(); i++)
-    EXPECT_EQ(shape[i], out_shape[i]);
-  const float* out_label = output[1].data<const float*>();
+  for (size_t i = 0; i < shape.size(); i++) EXPECT_EQ(shape[i], out_shape[i]);
+  const int* out_label = output[1].data<int>();
   EXPECT_EQ(raw_label, out_label[0]);
   // opencv imencode will have some information loss
-  //const float* out_pixel = output[0].data<const float*>();
-  //for(size_t i = 0; i < total; i++) 
+  // const float* out_pixel = output[0].data<const float*>();
+  // for(size_t i = 0; i < total; i++)
   //  EXPECT_LE(fabs(in_pixel[i]-out_pixel[i]), 10.f);
 }
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
index 6f5518e..a5fd960 100644
--- a/test/singa/test_dense.cc
+++ b/test/singa/test_dense.cc
@@ -66,7 +66,7 @@ TEST(Dense, ForwardCpp) {
 
   singa::Tensor out1 = dense.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
-  const float *outptr1 = out1.data<const float *>();
+  const float *outptr1 = out1.data<float>();
   EXPECT_EQ(9u, out1.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
@@ -116,7 +116,7 @@ TEST(Dense, BackwardCpp) {
   singa::Tensor in_grad = ret.first;
   singa::Tensor dweight = ret.second.at(0);
   singa::Tensor dbias = ret.second.at(1);
-  const float *dx = in_grad.data<const float *>();
+  const float *dx = in_grad.data<float>();
   EXPECT_EQ(6u, in_grad.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
@@ -124,7 +124,7 @@ TEST(Dense, BackwardCpp) {
           (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
            dy[i * 3 + 2] * we[2 * 2 + j]),
           dx[i * 2 + j]);
-  const float *dweightx = dweight.data<const float *>();
+  const float *dweightx = dweight.data<float>();
   EXPECT_EQ(6u, dweight.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
@@ -132,7 +132,7 @@ TEST(Dense, BackwardCpp) {
           (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
            dy[2 * 3 + i] * x[2 * 2 + j]),
           dweightx[i * 2 + j]);
-  const float *dbiasx = dbias.data<const float *>();
+  const float *dbiasx = dbias.data<float>();
   EXPECT_EQ(3u, dbias.Size());
   for (int i = 0; i < 3; i++)
     EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
@@ -170,7 +170,7 @@ TEST(Dense, ForwardCuda) {
   singa::Tensor out1 = dense.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out1.ToDevice(&host);
-  const float *outptr1 = out1.data<const float *>();
+  const float *outptr1 = out1.data<float>();
   EXPECT_EQ(9u, out1.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
@@ -219,7 +219,7 @@ TEST(Dense, BackwardCuda) {
   singa::Tensor dweight = ret.second.at(0);
   singa::Tensor dbias = ret.second.at(1);
   in_grad.ToDevice(&host);
-  const float *dx = in_grad.data<const float *>();
+  const float *dx = in_grad.data<float>();
   EXPECT_EQ(6u, in_grad.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
@@ -228,7 +228,7 @@ TEST(Dense, BackwardCuda) {
            dy[i * 3 + 2] * we[2 * 2 + j]),
           dx[i * 2 + j]);
   dweight.ToDevice(&host);
-  const float *dweightx = dweight.data<const float *>();
+  const float *dweightx = dweight.data<float>();
   EXPECT_EQ(6u, dweight.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
@@ -237,7 +237,7 @@ TEST(Dense, BackwardCuda) {
            dy[2 * 3 + i] * x[2 * 2 + j]),
           dweightx[i * 2 + j]);
   dbias.ToDevice(&host);
-  const float *dbiasx = dbias.data<const float *>();
+  const float *dbiasx = dbias.data<float>();
   EXPECT_EQ(3u, dbias.Size());
   for (int i = 0; i < 3; i++)
     EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_dropout.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dropout.cc b/test/singa/test_dropout.cc
index b6ae9c6..3dd988a 100644
--- a/test/singa/test_dropout.cc
+++ b/test/singa/test_dropout.cc
@@ -52,11 +52,11 @@ TEST(Dropout, Forward) {
 
   singa::Tensor out1 = drop.Forward(singa::kTrain, in);
 
-  const float* mptr = drop.mask().data<const float*>();
+  const float* mptr = drop.mask().data<float>();
   for (size_t i = 0; i < n; i++)
     EXPECT_FLOAT_EQ(0, mptr[i] * (mptr[i] - scale));
 
-  const float* outptr1 = out1.data<const float*>();
+  const float* outptr1 = out1.data<float>();
   EXPECT_EQ(n, out1.Size());
   // the output value should be 0 or the same as the input
   EXPECT_EQ(0.f, outptr1[0] * (outptr1[0] - scale * x[0]));
@@ -65,7 +65,7 @@ TEST(Dropout, Forward) {
 
   singa::Tensor out2 = drop.Forward(singa::kEval, in);
   EXPECT_EQ(n, out2.Size());
-  const float* outptr2 = out2.data<const float*>();
+  const float* outptr2 = out2.data<float>();
   // the output value should be the same as the input
   EXPECT_EQ(x[0], outptr2[0]);
   EXPECT_EQ(x[1], outptr2[1]);
@@ -92,9 +92,9 @@ TEST(Dropout, Backward) {
   singa::Tensor grad(singa::Shape{n});
   grad.CopyDataFromHostPtr(dy, n);
 
-  const float* mptr = drop.mask().data<const float*>();
+  const float* mptr = drop.mask().data<float>();
   const auto ret = drop.Backward(singa::kTrain, grad);
-  const float* dx = ret.first.data<const float*>();
+  const float* dx = ret.first.data<float>();
   EXPECT_FLOAT_EQ(dx[0], dy[0] * (mptr[0] > 0 ? 1.0f : 0.0f) * scale);
   EXPECT_FLOAT_EQ(dx[1], dy[1] * (mptr[1] > 0) * scale);
   EXPECT_FLOAT_EQ(dx[7], dy[7] * (mptr[7] > 0) * scale);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_flatten.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_flatten.cc b/test/singa/test_flatten.cc
index f139a75..2a77272 100644
--- a/test/singa/test_flatten.cc
+++ b/test/singa/test_flatten.cc
@@ -55,7 +55,7 @@ TEST(Flatten, ForwardCPU) {
   EXPECT_EQ(n, out.Size());
   EXPECT_EQ(6u, out.shape(0));
   EXPECT_EQ(2u, out.shape(1));
-  const float *yptr = out.data<const float *>();
+  const float *yptr = out.data<float>();
   for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(x[i], yptr[i]);
 }
 
@@ -77,7 +77,7 @@ TEST(Flatten, BackwardCPU) {
 
   singa::Tensor temp = flt.Forward(singa::kTrain, in);
   const auto out = flt.Backward(singa::kTrain, temp);
-  const float *xptr = out.first.data<const float *>();
+  const float *xptr = out.first.data<float>();
   EXPECT_EQ(n, out.first.Size());
   EXPECT_EQ(2u, out.first.shape(0));
   EXPECT_EQ(1u, out.first.shape(1));
@@ -108,7 +108,7 @@ TEST(Flatten, ForwardGPU) {
   EXPECT_EQ(n, out.Size());
   EXPECT_EQ(6u, out.shape(0));
   EXPECT_EQ(2u, out.shape(1));
-  const float *yptr = out.data<const float *>();
+  const float *yptr = out.data<float>();
   for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(x[i], yptr[i]);
 }
 
@@ -134,7 +134,7 @@ TEST(Flatten, BackwardGPU) {
   singa::CppCPU host(0, 1);
   singa::Tensor in_diff = ret.first;
   in_diff.ToDevice(&host);
-  const float *xptr = in_diff.data<const float *>();
+  const float *xptr = in_diff.data<float>();
   EXPECT_EQ(n, in_diff.Size());
   EXPECT_EQ(2u, in_diff.shape(0));
   EXPECT_EQ(1u, in_diff.shape(1));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_initializer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_initializer.cc b/test/singa/test_initializer.cc
index a64d59e..e99cd79 100644
--- a/test/singa/test_initializer.cc
+++ b/test/singa/test_initializer.cc
@@ -27,7 +27,7 @@ TEST(Initializer, Constant) {
   conf.set_value(3.1f);
   x.Setup(conf);
   x.Fill(&t);
-  const float* xPtr = t.data<const float*>();
+  const float* xPtr = t.data<float>();
   for (size_t i = 0; i < n; i++)
     EXPECT_FLOAT_EQ(xPtr[i], 3.1f);
 }
@@ -42,7 +42,7 @@ TEST(Initializer, Gaussian) {
   conf.set_std(0.01f);
   x.Setup(conf);
   x.Fill(&t);
-  const float* xPtr = t.data<const float*>();
+  const float* xPtr = t.data<float>();
   float mean = 0.0f, std = 0.0f;
   for (size_t i = 0; i < n; i++)
     mean += xPtr[i];
@@ -66,7 +66,7 @@ TEST(Initializer, ConstantCUDA) {
   x.Setup(conf);
   x.Fill(&t);
   t.ToHost();
-  const float* xPtr = t.data<const float*>();
+  const float* xPtr = t.data<float>();
   for (size_t i = 0; i < n; i++)
     EXPECT_FLOAT_EQ(xPtr[i], 3.1f);
 
@@ -75,7 +75,7 @@ TEST(Initializer, ConstantCUDA) {
   singa::Tensor s(singa::Shape{n}, &dev);
   y.Fill(&s);
   s.ToHost();
-  const float* sPtr = s.data<const float*>();
+  const float* sPtr = s.data<float>();
   for (size_t i = 0; i < n; i++)
     EXPECT_FLOAT_EQ(sPtr[i], -0.1f);
 }
@@ -92,7 +92,7 @@ TEST(Initializer, GaussianCUDA) {
   x.Setup(conf);
   x.Fill(&t);
   t.ToHost();
-  const float* tPtr = t.data<const float*>();
+  const float* tPtr = t.data<float>();
   float mean = 0.0f, std = 0.0f;
   for (size_t i = 0; i < n; i++)
     mean += tPtr[i];
@@ -109,7 +109,7 @@ TEST(Initializer, GaussianCUDA) {
   singa::Tensor s(singa::Shape{n}, &dev);
   y.Fill(&s);
   s.ToHost();
-  const float* sPtr = s.data<const float*>();
+  const float* sPtr = s.data<float>();
   for (size_t i = 0; i < n; i++)
     mean += sPtr[i];
   mean /= n;
@@ -128,7 +128,7 @@ TEST(Initializer, XavierCUDA) {
   singa::Tensor t(singa::Shape{m, n}, &dev);
   x.Fill(&t);
   t.ToHost();
-  const float* xPtr = t.data<const float*>();
+  const float* xPtr = t.data<float>();
   float mean = 0.0f;
   float high = -100.0f, low = 100.0f;
   for (size_t i = 0; i < n; i++) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_mse.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc
index 9ba062d..928be9d 100644
--- a/test/singa/test_mse.cc
+++ b/test/singa/test_mse.cc
@@ -44,7 +44,7 @@ class TestMSE : public ::testing::Test {
 TEST_F(TestMSE, CppForward) {
   singa::MSE mse;
   const Tensor& loss = mse.Forward(p, t);
-  auto ldat = loss.data<const float*>();
+  auto ldat = loss.data<float>();
 
   for (size_t i = 0, k = 0; i < loss.Size(); i++) {
     float l = 0.f;
@@ -61,7 +61,7 @@ TEST_F(TestMSE, CppBackward) {
   mse.Forward(p, t);
   const Tensor& grad = mse.Backward();
 
-  auto gdat = grad.data<const float*>();
+  auto gdat = grad.data<float>();
 
   for (size_t i = 0; i < grad.Size(); i++)
     EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));
@@ -76,7 +76,7 @@ TEST_F(TestMSE, CudaForward) {
   Tensor loss = mse.Forward(p, t);
 
   loss.ToHost();
-  auto ldat = loss.data<const float*>();
+  auto ldat = loss.data<float>();
 
   for (size_t i = 0, k = 0; i < loss.Size(); i++) {
     float l = 0.f;
@@ -95,7 +95,7 @@ TEST_F(TestMSE, CudaBackward) {
   mse.Forward(p, t);
   Tensor grad = mse.Backward();
   grad.ToHost();
-  auto gdat = grad.data<const float*>();
+  auto gdat = grad.data<float>();
 
   for (size_t i = 0; i < grad.Size(); i++)
     EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_nesterov.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_nesterov.cc b/test/singa/test_nesterov.cc
index a9b264c..35b2b4d 100644
--- a/test/singa/test_nesterov.cc
+++ b/test/singa/test_nesterov.cc
@@ -38,7 +38,7 @@ TEST(Nesterov, ApplyCPU) {
   nesterov.Apply(0, lr, "xx", grad, &value);
 
   singa::Tensor v1 = value.Clone();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   float history[4], tmp[4];
   for (int i = 0; i < 4; ++i) {
     history[i] = g[i] * lr;
@@ -49,7 +49,7 @@ TEST(Nesterov, ApplyCPU) {
   grad.CopyDataFromHostPtr(g, 4);
   nesterov.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; ++i) {
     tmp[i] = history[i];
     history[i] = history[i] * func(1) + g[i] * lr;
@@ -77,7 +77,7 @@ TEST(Nesterov, ApplyCUDA) {
 
   singa::Tensor v1 = value.Clone();
   v1.ToHost();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   float history[4], tmp[4];
   for (int i = 0; i < 4; ++i) {
     history[i] = g[i] * lr;
@@ -89,7 +89,7 @@ TEST(Nesterov, ApplyCUDA) {
   nesterov.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
   v2.ToHost();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; ++i) {
     tmp[i] = history[i];
     history[i] = history[i] * func(1) + g[i] * lr;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_prelu.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_prelu.cc b/test/singa/test_prelu.cc
index faff093..fee7c5b 100644
--- a/test/singa/test_prelu.cc
+++ b/test/singa/test_prelu.cc
@@ -60,7 +60,7 @@ TEST(PReLU, ForwardCPU) {
   prelu.Set_a(a);
 
   singa::Tensor out = prelu.Forward(singa::kTrain, in);
-  const float *yptr = out.data<const float *>();
+  const float *yptr = out.data<float>();
   EXPECT_EQ(n, out.Size());
 
   float *y = new float[n];
@@ -106,8 +106,8 @@ TEST(PReLU, BackwardCPU) {
   singa::Tensor out_diff(singa::Shape{batchsize, c, h, w});
   out_diff.CopyDataFromHostPtr<float>(grad, n);
   const auto ret = prelu.Backward(singa::kTrain, out_diff);
-  const float *xptr = ret.first.data<const float *>();
-  const float *aptr = ret.second.at(0).data<const float *>();
+  const float *xptr = ret.first.data<float>();
+  const float *aptr = ret.second.at(0).data<float>();
   float *dx = new float[n];
   size_t div_factor = prelu.Channel_shared() ? c : 1;
   size_t params = prelu.Channel_shared() ? 1 : c;
@@ -162,7 +162,7 @@ TEST(PReLU, ForwardGPU) {
   singa::Tensor out = prelu.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   out.ToDevice(&host);
-  const float *yptr = out.data<const float *>();
+  const float *yptr = out.data<float>();
   EXPECT_EQ(n, out.Size());
 
   float *y = new float[n];
@@ -212,10 +212,10 @@ TEST(PReLU, BackwardGPU) {
   singa::Tensor in_diff = ret.first;
   singa::CppCPU host(0, 1);
   in_diff.ToDevice(&host);
-  const float *xptr = in_diff.data<const float *>();
+  const float *xptr = in_diff.data<float>();
   singa::Tensor a_diff = ret.second.at(0);
   a_diff.ToDevice(&host);
-  const float *aptr = a_diff.data<const float *>();
+  const float *aptr = a_diff.data<float>();
   float *dx = new float[n];
   size_t div_factor = prelu.Channel_shared() ? c : 1;
   size_t params = prelu.Channel_shared() ? 1 : c;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_rmsprop.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_rmsprop.cc b/test/singa/test_rmsprop.cc
index ddfdefe..004a9b6 100644
--- a/test/singa/test_rmsprop.cc
+++ b/test/singa/test_rmsprop.cc
@@ -43,7 +43,7 @@ TEST(RMSProp, ApplyCPU) {
   rmsprop.Apply(0, lr, "xx", grad, &value);
 
   singa::Tensor v1 = value.Clone();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   float history[4];
   for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i] * (1 - rho);
   for (int i = 0; i < 4; ++i)
@@ -53,7 +53,7 @@ TEST(RMSProp, ApplyCPU) {
   grad.CopyDataFromHostPtr(g, 4);
   rmsprop.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; ++i)
     history[i] = history[i] * rho + g[i] * g[i] * (1 - rho);
 
@@ -84,7 +84,7 @@ TEST(RMSProp, ApplyCUDA) {
 
   singa::Tensor v1 = value.Clone();
   v1.ToHost();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   float history[4];
   for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i] * (1 - rho);
   for (int i = 0; i < 4; ++i)
@@ -95,7 +95,7 @@ TEST(RMSProp, ApplyCUDA) {
   rmsprop.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
   v2.ToHost();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; ++i)
     history[i] = history[i] * rho + g[i] * g[i] * (1 - rho);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_sgd.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_sgd.cc b/test/singa/test_sgd.cc
index 5417b04..b9c6368 100644
--- a/test/singa/test_sgd.cc
+++ b/test/singa/test_sgd.cc
@@ -36,7 +36,7 @@ TEST(SGD, ApplyWithoutMomentum) {
   sgd.Apply(0, lr, "xx", grad, &value);
 
   singa::Tensor v1 = value.Clone();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv1[i], v[i] - g[i] * lr);
   }
@@ -46,7 +46,7 @@ TEST(SGD, ApplyWithoutMomentum) {
   grad.CopyDataFromHostPtr(g, 4);
   sgd.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv2[i], newv1[i] - g[i] * lr);
   }
@@ -68,7 +68,7 @@ TEST(SGD, ApplyWithMomentum) {
   sgd.Apply(0, lr, "xx", grad, &value);
 
   singa::Tensor v1 = value.Clone();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv1[i], v[i] - g[i] * lr);
   }
@@ -76,7 +76,7 @@ TEST(SGD, ApplyWithMomentum) {
   grad.CopyDataFromHostPtr(g, 4);
   sgd.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv2[i], newv1[i] - (g[i] * lr + g[i] * lr * func(1)));
   }
@@ -98,7 +98,7 @@ TEST(SGD, ApplyWithoutMomentumCuda) {
 
   singa::Tensor v1 = value.Clone();
   v1.ToHost();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv1[i], v[i] - g[i] * lr);
   }
@@ -109,7 +109,7 @@ TEST(SGD, ApplyWithoutMomentumCuda) {
   sgd.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
   v2.ToHost();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv2[i], newv1[i] - g[i] * lr);
   }
@@ -133,7 +133,7 @@ TEST(SGD, ApplyWithMomentumCuda) {
 
   singa::Tensor v1 = value.Clone();
   v1.ToHost();
-  const float* newv1 = v1.data<const float*>();
+  const float* newv1 = v1.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv1[i], v[i] - g[i] * lr);
   }
@@ -142,7 +142,7 @@ TEST(SGD, ApplyWithMomentumCuda) {
   sgd.Apply(1, lr, "xx", grad, &value);
   singa::Tensor v2 = value.Clone();
   v2.ToHost();
-  const float* newv2 = v2.data<const float*>();
+  const float* newv2 = v2.data<float>();
   for (int i = 0; i < 4; i++) {
     EXPECT_FLOAT_EQ(newv2[i], newv1[i] - (g[i] * lr + g[i] * lr * func(1)));
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_softmax.cc b/test/singa/test_softmax.cc
index 2bf4505..f4a3bd7 100644
--- a/test/singa/test_softmax.cc
+++ b/test/singa/test_softmax.cc
@@ -47,7 +47,7 @@ TEST(Softmax, Forward) {
   sft.Setup(Shape{col}, conf);
 
   singa::Tensor out = sft.Forward(singa::kTrain, in);
-  const float* yptr = out.data<const float*>();
+  const float* yptr = out.data<float>();
   EXPECT_EQ(n, out.Size());
 
   float* sigma = new float[row];
@@ -74,13 +74,13 @@ TEST(Softmax, Backward) {
   singa::LayerConf conf;
   sft.Setup(Shape{col}, conf);
   singa::Tensor out = sft.Forward(singa::kTrain, in);
-  const float* yptr = out.data<const float*>();
+  const float* yptr = out.data<float>();
 
   const float grad[] = {2.0f, -3.0f, 1.0f, 3.0f, -1.0f, -2.0};
   singa::Tensor out_diff(singa::Shape{row, col});
   out_diff.CopyDataFromHostPtr<float>(grad, n);
   const auto in_diff = sft.Backward(singa::kTrain, out_diff);
-  const float* xptr = in_diff.first.data<const float*>();
+  const float* xptr = in_diff.first.data<float>();
 
   float* dx = new float[n];
   float* sigma = new float[row];

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/833f4619/test/singa/test_tensor_math.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc
index 1092d69..7deb339 100644
--- a/test/singa/test_tensor_math.cc
+++ b/test/singa/test_tensor_math.cc
@@ -26,13 +26,13 @@ TEST_F(TestTensorMath, MemberAbs) {
   Tensor aa = a.Clone();
   Tensor bb = b.Clone();
   Tensor cc = aa - bb;
-  const float *dptr = cc.data<const float *>();
+  const float *dptr = cc.data<float>();
   EXPECT_NEAR(-0.1, dptr[0], 1e-5);
   EXPECT_NEAR(-0.1, dptr[1], 1e-5);
   EXPECT_NEAR(-0.1, dptr[2], 1e-5);
 
   Tensor p = Abs(cc);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(0.1, dptr1[0], 1e-5);
   EXPECT_NEAR(0.1, dptr1[1], 1e-5);
   EXPECT_NEAR(0.1, dptr1[2], 1e-5);
@@ -40,7 +40,7 @@ TEST_F(TestTensorMath, MemberAbs) {
 
 TEST_F(TestTensorMath, MemberExp) {
   Tensor p = Exp(a);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(exp(1.0f), dptr1[0], 1e-5);
   EXPECT_NEAR(exp(2.0f), dptr1[1], 1e-5);
   EXPECT_NEAR(exp(3.0f), dptr1[2], 1e-5);
@@ -48,7 +48,7 @@ TEST_F(TestTensorMath, MemberExp) {
 
 TEST_F(TestTensorMath, MemberLog) {
   Tensor p = Log(a);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(log(1.0f), dptr1[0], 1e-5);
   EXPECT_NEAR(log(2.0f), dptr1[1], 1e-5);
   EXPECT_NEAR(log(3.0f), dptr1[2], 1e-5);
@@ -57,13 +57,13 @@ TEST_F(TestTensorMath, MemberLog) {
 TEST_F(TestTensorMath, MemberReLU) {
   Tensor aa = a.Clone();
   Tensor cc = aa - 2.0f;
-  const float *dptr = cc.data<const float *>();
+  const float *dptr = cc.data<float>();
   EXPECT_NEAR(-1.0f, dptr[0], 1e-5);
   EXPECT_NEAR(0.0f, dptr[1], 1e-5);
   EXPECT_NEAR(1.0f, dptr[2], 1e-5);
 
   Tensor p = ReLU(cc);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(0.0f, dptr1[0], 1e-5);
   EXPECT_NEAR(0.0f, dptr1[1], 1e-5);
   EXPECT_NEAR(1.0f, dptr1[2], 1e-5);
@@ -71,7 +71,7 @@ TEST_F(TestTensorMath, MemberReLU) {
 
 TEST_F(TestTensorMath, MemberSigmoid) {
   Tensor p = Sigmoid(a);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(1.0f / (1.0f + exp(-1.0f)), dptr1[0], 1e-5);
   EXPECT_NEAR(1.0f / (1.0f + exp(-2.0f)), dptr1[1], 1e-5);
   EXPECT_NEAR(1.0f / (1.0f + exp(-3.0f)), dptr1[2], 1e-5);
@@ -80,13 +80,13 @@ TEST_F(TestTensorMath, MemberSigmoid) {
 TEST_F(TestTensorMath, MemberSign) {
   Tensor aa = a.Clone();
   Tensor cc = aa - 2.0f;
-  const float *dptr = cc.data<const float *>();
+  const float *dptr = cc.data<float>();
   EXPECT_NEAR(-1.0f, dptr[0], 1e-5);
   EXPECT_NEAR(0.0f, dptr[1], 1e-5);
   EXPECT_NEAR(1.0f, dptr[2], 1e-5);
 
   Tensor p = Sign(cc);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_EQ(0.0f, dptr1[0]);
   EXPECT_EQ(0.0f, dptr1[1]);
   EXPECT_EQ(1.0f, dptr1[2]);
@@ -94,7 +94,7 @@ TEST_F(TestTensorMath, MemberSign) {
 
 TEST_F(TestTensorMath, MemberSqrt) {
   Tensor p = Sqrt(a);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(sqrt(1.0), dptr1[0], 1e-5);
   EXPECT_NEAR(sqrt(2.0), dptr1[1], 1e-5);
   EXPECT_NEAR(sqrt(3.0), dptr1[2], 1e-5);
@@ -102,7 +102,7 @@ TEST_F(TestTensorMath, MemberSqrt) {
 
 TEST_F(TestTensorMath, MemberSquare) {
   Tensor p = Square(a);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(1.0, dptr1[0], 1e-5);
   EXPECT_NEAR(4.0, dptr1[1], 1e-5);
   EXPECT_NEAR(9.0, dptr1[2], 1e-5);
@@ -110,7 +110,7 @@ TEST_F(TestTensorMath, MemberSquare) {
 
 TEST_F(TestTensorMath, MemberTanh) {
   Tensor p = Tanh(a);
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_NEAR(tanh(1.0), dptr1[0], 1e-5);
   EXPECT_NEAR(tanh(2.0), dptr1[1], 1e-5);
   EXPECT_NEAR(tanh(3.0), dptr1[2], 1e-5);
@@ -118,13 +118,13 @@ TEST_F(TestTensorMath, MemberTanh) {
 
 TEST_F(TestTensorMath, Sum) {
   Tensor p1 = Sum(e, 0);
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_FLOAT_EQ(9.0f, dptr1[0]);
   EXPECT_FLOAT_EQ(12.0f, dptr1[1]);
 
   Tensor p2(Shape{3, 1});
   p2 = Sum(e, 1);
-  const float *dptr2 = p2.data<const float *>();
+  const float *dptr2 = p2.data<float>();
   EXPECT_FLOAT_EQ(3.0f, dptr2[0]);
   EXPECT_FLOAT_EQ(7.0f, dptr2[1]);
   EXPECT_FLOAT_EQ(11.0f, dptr2[2]);
@@ -132,7 +132,7 @@ TEST_F(TestTensorMath, Sum) {
 
 TEST_F(TestTensorMath, SoftMax) {
   Tensor p1 = SoftMax(Reshape(e, Shape{1, 6}));
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   float sum = 0;
   for (int i = 0; i < 6; i++) sum += exp(i + 1);
   EXPECT_NEAR(exp(1) / sum, dptr1[0], 1e-5);
@@ -143,14 +143,14 @@ TEST_F(TestTensorMath, SoftMax) {
   EXPECT_NEAR(exp(6) / sum, dptr1[5], 1e-5);
 
   Tensor p2 = SoftMax(e);
-  const float *dptr2 = p2.data<const float *>();
+  const float *dptr2 = p2.data<float>();
   EXPECT_NEAR(exp(1) / (exp(1) + exp(2)), dptr2[0], 1e-5);
   EXPECT_NEAR(exp(2) / (exp(1) + exp(2)), dptr2[1], 1e-5);
 }
 
 TEST_F(TestTensorMath, MemberLT) {
   Tensor p1 = a < 2.0f;
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_FLOAT_EQ(1.0f, dptr1[0]);
   EXPECT_FLOAT_EQ(0.0f, dptr1[1]);
   EXPECT_FLOAT_EQ(0.0f, dptr1[2]);
@@ -158,7 +158,7 @@ TEST_F(TestTensorMath, MemberLT) {
 
 TEST_F(TestTensorMath, MemberLE) {
   Tensor p1 = a <= 2.0f;
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_FLOAT_EQ(1.0f, dptr1[0]);
   EXPECT_FLOAT_EQ(1.0f, dptr1[1]);
   EXPECT_FLOAT_EQ(0.0f, dptr1[2]);
@@ -166,7 +166,7 @@ TEST_F(TestTensorMath, MemberLE) {
 
 TEST_F(TestTensorMath, MemberGT) {
   Tensor p1 = a > 2.0f;
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_FLOAT_EQ(0.0f, dptr1[0]);
   EXPECT_FLOAT_EQ(0.0f, dptr1[1]);
   EXPECT_FLOAT_EQ(1.0f, dptr1[2]);
@@ -174,7 +174,7 @@ TEST_F(TestTensorMath, MemberGT) {
 
 TEST_F(TestTensorMath, MemberGE) {
   Tensor p1 = a >= 2.0f;
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_FLOAT_EQ(0.0f, dptr1[0]);
   EXPECT_FLOAT_EQ(1.0f, dptr1[1]);
   EXPECT_FLOAT_EQ(1.0f, dptr1[2]);
@@ -182,7 +182,7 @@ TEST_F(TestTensorMath, MemberGE) {
 
 TEST_F(TestTensorMath, MemberPow) {
   Tensor p1 = Pow(b, 3.0f);
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_FLOAT_EQ(pow(1.1f, 3.0f), dptr1[0]);
   EXPECT_FLOAT_EQ(pow(2.1f, 3.0f), dptr1[1]);
   EXPECT_FLOAT_EQ(pow(3.1f, 3.0f), dptr1[2]);
@@ -190,7 +190,7 @@ TEST_F(TestTensorMath, MemberPow) {
   // TODO(Yuchen): check pow(tensor a, tensor b) and add testcase after the
   // function is complete
   // Tensor p2 = Pow(a,b);
-  // const float *dptr2 = p2.data<const float *>();
+  // const float *dptr2 = p2.data<float>();
   // EXPECT_FLOAT_EQ(pow(1.0f,1.1f), dptr2[0]);
   // EXPECT_FLOAT_EQ(pow(2.0f,2.1f), dptr2[1]);
   // EXPECT_FLOAT_EQ(pow(3.0f,3.1f), dptr2[2]);
@@ -198,7 +198,7 @@ TEST_F(TestTensorMath, MemberPow) {
 
 TEST_F(TestTensorMath, MemberSub) {
   Tensor p1 = a - b;
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_NEAR(-0.1, dptr1[0], 1e-5);
   EXPECT_NEAR(-0.1, dptr1[1], 1e-5);
   EXPECT_NEAR(-0.1, dptr1[2], 1e-5);
@@ -206,7 +206,7 @@ TEST_F(TestTensorMath, MemberSub) {
 
 TEST_F(TestTensorMath, MemberEltwiseMult) {
   Tensor p1 = a * b;
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_NEAR(1.0 * 1.1, dptr1[0], 1e-5);
   EXPECT_NEAR(2.0 * 2.1, dptr1[1], 1e-5);
   EXPECT_NEAR(3.0 * 3.1, dptr1[2], 1e-5);
@@ -214,19 +214,19 @@ TEST_F(TestTensorMath, MemberEltwiseMult) {
 
 TEST_F(TestTensorMath, MemberDiv) {
   Tensor p1 = a / b;
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   EXPECT_NEAR(1.0 / 1.1, dptr1[0], 1e-5);
   EXPECT_NEAR(2.0 / 2.1, dptr1[1], 1e-5);
   EXPECT_NEAR(3.0 / 3.1, dptr1[2], 1e-5);
 
   Tensor p2 = Div(10.0f, b);
-  const float *dptr2 = p2.data<const float *>();
+  const float *dptr2 = p2.data<float>();
   EXPECT_NEAR(10.0 / 1.1, dptr2[0], 1e-5);
   EXPECT_NEAR(10.0 / 2.1, dptr2[1], 1e-5);
   EXPECT_NEAR(10.0 / 3.1, dptr2[2], 1e-5);
 
   Tensor p3 = a / 8.0f;
-  const float *dptr3 = p3.data<const float *>();
+  const float *dptr3 = p3.data<float>();
   EXPECT_NEAR(1.0 / 8.0, dptr3[0], 1e-5);
   EXPECT_NEAR(2.0 / 8.0, dptr3[1], 1e-5);
   EXPECT_NEAR(3.0 / 8.0, dptr3[2], 1e-5);
@@ -235,7 +235,7 @@ TEST_F(TestTensorMath, MemberDiv) {
 TEST_F(TestTensorMath, MemberBernoulli) {
   Tensor p1(Shape{10000});
   Bernoulli(0.3f, &p1);
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   float sum = 0;
   for (int i = 0; i < 10000; i++) sum += dptr1[i];
   float mean = sum / 10000;
@@ -250,7 +250,7 @@ TEST_F(TestTensorMath, MemberBernoulli) {
 TEST_F(TestTensorMath, MemberUniform) {
   Tensor p1(Shape{10000});
   Uniform(0.1f, 0.2f, &p1);
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   float sum = 0;
   for (int i = 0; i < 10000; i++) sum += dptr1[i];
   float mean = sum / 10000;
@@ -265,7 +265,7 @@ TEST_F(TestTensorMath, MemberUniform) {
 TEST_F(TestTensorMath, MemberGaussian) {
   Tensor p1(Shape{50000});
   Gaussian(0.0f, 1.0f, &p1);
-  const float *dptr1 = p1.data<const float *>();
+  const float *dptr1 = p1.data<float>();
   float sum = 0;
   for (int i = 0; i < 50000; i++) sum += dptr1[i];
   float mean = sum / 50000;
@@ -280,7 +280,7 @@ TEST_F(TestTensorMath, MemberGaussian) {
 TEST_F(TestTensorMath, MemberAddTensor) {
   Tensor aa = a.Clone();
   aa += a;
-  const float *dptr = aa.data<const float *>();
+  const float *dptr = aa.data<float>();
   EXPECT_FLOAT_EQ(2.0f, dptr[0]);
   EXPECT_FLOAT_EQ(4.0f, dptr[1]);
   EXPECT_FLOAT_EQ(6.0f, dptr[2]);
@@ -288,13 +288,13 @@ TEST_F(TestTensorMath, MemberAddTensor) {
   // check p is initialized to 0
   Tensor p(Shape{6});
   p += aa;
-  const float *dptr1 = p.data<const float *>();
+  const float *dptr1 = p.data<float>();
   EXPECT_FLOAT_EQ(2.0f, dptr1[0]);
   EXPECT_FLOAT_EQ(4.0f, dptr1[1]);
   EXPECT_FLOAT_EQ(6.0f, dptr1[2]);
 
   a += b;
-  const float *dptr2 = a.data<const float *>();
+  const float *dptr2 = a.data<float>();
   EXPECT_FLOAT_EQ(2.1f, dptr2[0]);
   EXPECT_FLOAT_EQ(4.1f, dptr2[1]);
   EXPECT_FLOAT_EQ(6.1f, dptr2[2]);
@@ -304,21 +304,21 @@ TEST_F(TestTensorMath, MemberAddTensor) {
 TEST_F(TestTensorMath, AddTensors) {
   Tensor ret(a.shape(), a.device(), a.data_type());
   Add(a, b, &ret);
-  const float *dptr = ret.data<const float *>();
+  const float *dptr = ret.data<float>();
   EXPECT_FLOAT_EQ(2.1f, dptr[0]);
   EXPECT_FLOAT_EQ(4.1f, dptr[1]);
   EXPECT_FLOAT_EQ(6.1f, dptr[2]);
   EXPECT_FLOAT_EQ(12.1f, dptr[5]);
 
   const Tensor d = a + b;
-  const float *dptr2 = d.data<const float *>();
+  const float *dptr2 = d.data<float>();
   EXPECT_FLOAT_EQ(2.1f, dptr2[0]);
   EXPECT_FLOAT_EQ(4.1f, dptr2[1]);
   EXPECT_FLOAT_EQ(6.1f, dptr2[2]);
   EXPECT_FLOAT_EQ(12.1f, dptr2[5]);
 
   Add(a, b, &a);
-  const float *dptr1 = a.data<const float *>();
+  const float *dptr1 = a.data<float>();
   EXPECT_FLOAT_EQ(2.1f, dptr1[0]);
   EXPECT_FLOAT_EQ(4.1f, dptr1[1]);
   EXPECT_FLOAT_EQ(6.1f, dptr1[2]);
@@ -328,7 +328,7 @@ TEST_F(TestTensorMath, AddTensors) {
 TEST_F(TestTensorMath, SetValue) {
   Tensor t(Shape{4});
   t.SetValue(0.3f);
-  const float *ptr = t.data<const float *>();
+  const float *ptr = t.data<float>();
   for (int i = 0; i < 4; i++) EXPECT_FLOAT_EQ(ptr[i], 0.3f);
 }
 
@@ -336,7 +336,7 @@ TEST_F(TestTensorMath, Reshape) {
   Tensor t(Shape{4});
   t.SetValue(0.3f);
   Tensor p = Reshape(t, Shape{4, 1});
-  const float *ptr = t.data<const float *>();
+  const float *ptr = t.data<float>();
   EXPECT_EQ(p.shape(0), 4u);
   EXPECT_EQ(p.shape(1), 1u);
   for (int i = 0; i < 4; i++) EXPECT_FLOAT_EQ(ptr[i], 0.3f);
@@ -354,7 +354,7 @@ TEST_F(TestTensorMath, MultCpp) {
   t.CopyDataFromHostPtr(x, 4);
   d.CopyDataFromHostPtr(dat1, 6);
   Tensor C = Mult(d, t);
-  const float *xptr = C.data<const float *>();
+  const float *xptr = C.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       float tmp = 0;
@@ -367,10 +367,10 @@ TEST_F(TestTensorMath, MultCpp) {
   const float y[8] = {1.0f, 2.0f, 3.0f, 4.0f, 1.1f, 2.1f, 3.1f, 4.1f};
   Tensor s(Shape{4, 2});
   s.CopyDataFromHostPtr(y, 8);
-  const float *sPtr = s.data<const float *>();
+  const float *sPtr = s.data<float>();
   for (int i = 0; i < 8; i++) EXPECT_FLOAT_EQ(sPtr[i], y[i]);
   Tensor D = Mult(d, s.T());
-  const float *DPtr = D.data<const float *>();
+  const float *DPtr = D.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 4; j++) {
       float tmp = 0;
@@ -387,7 +387,7 @@ TEST_F(TestTensorMath, MultCpp) {
   Tensor o(Shape{4, 4});
 
   Mult(p, q, &o);
-  const float *oPtr = o.data<const float *>();
+  const float *oPtr = o.data<float>();
   for (int i = 0; i < 4; i++) {
     for (int j = 0; j < 4; j++) {
       EXPECT_FLOAT_EQ(oPtr[i * 4 + j], x[i]);
@@ -401,7 +401,7 @@ TEST_F(TestTensorMath, AddColumnCpp) {
   t.CopyDataFromHostPtr(x, 3);
   d.CopyDataFromHostPtr(dat1, 6);
   AddColumn(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[i]);
@@ -414,7 +414,7 @@ TEST_F(TestTensorMath, SubColumnCpp) {
   t.CopyDataFromHostPtr(x, 3);
   d.CopyDataFromHostPtr(dat1, 6);
   SubColumn(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[i]);
@@ -428,7 +428,7 @@ TEST_F(TestTensorMath, DivColumnCpp) {
   t.CopyDataFromHostPtr(x, 3);
   d.CopyDataFromHostPtr(dat1, 6);
   DivColumn(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[i]);
@@ -442,7 +442,7 @@ TEST_F(TestTensorMath, AddRowCpp) {
   t.CopyDataFromHostPtr(x, 2);
   d.CopyDataFromHostPtr(dat1, 6);
   AddRow(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[j]);
@@ -456,7 +456,7 @@ TEST_F(TestTensorMath, SubRowCpp) {
   t.CopyDataFromHostPtr(x, 2);
   d.CopyDataFromHostPtr(dat1, 6);
   SubRow(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[j]);
@@ -470,7 +470,7 @@ TEST_F(TestTensorMath, MultRowCpp) {
   t.CopyDataFromHostPtr(x, 2);
   d.CopyDataFromHostPtr(dat1, 6);
   MultRow(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[j]);
@@ -482,7 +482,7 @@ TEST_F(TestTensorMath, SumRowsCpp) {
   Tensor t(Shape{2});
   d.CopyDataFromHostPtr(dat1, 6);
   SumRows(d, &t);
-  const float *tptr = t.data<const float *>();
+  const float *tptr = t.data<float>();
   for (int i = 0; i < 2; i++) {
     float tmp = 0;
     for (int j = 0; j < 3; j++) {
@@ -496,7 +496,7 @@ TEST_F(TestTensorMath, SumColumnsCpp) {
   Tensor t(Shape{3});
   d.CopyDataFromHostPtr(dat1, 6);
   SumColumns(d, &t);
-  const float *tptr = t.data<const float *>();
+  const float *tptr = t.data<float>();
   for (int i = 0; i < 3; i++) {
     float tmp = 0;
     for (int j = 0; j < 2; j++) {
@@ -525,7 +525,7 @@ TEST_F(TestTensorMath, MultCuda) {
   d.CopyDataFromHostPtr(dat1, 6);
   Tensor C = Mult(d, t);
   C.ToHost();
-  const float *xptr = C.data<const float *>();
+  const float *xptr = C.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       float tmp = 0;
@@ -541,7 +541,7 @@ TEST_F(TestTensorMath, MultCuda) {
   s.CopyDataFromHostPtr(y, 8);
   Tensor D = Mult(d, s.T());
   D.ToHost();
-  const float *DPtr = D.data<const float *>();
+  const float *DPtr = D.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 4; j++) {
       float tmp = 0;
@@ -559,7 +559,7 @@ TEST_F(TestTensorMath, MultCuda) {
 
   Mult(p, q, &o);
   o.ToHost();
-  const float *oPtr = o.data<const float *>();
+  const float *oPtr = o.data<float>();
   for (int i = 0; i < 4; i++) {
     for (int j = 0; j < 4; j++) {
       EXPECT_FLOAT_EQ(oPtr[i * 4 + j], x[i]);
@@ -576,7 +576,7 @@ TEST_F(TestTensorMath, AddColumnCuda) {
   d.ToDevice(&dev);
   AddColumn(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[i]);
@@ -593,7 +593,7 @@ TEST_F(TestTensorMath, SubColumnCuda) {
   d.ToDevice(&dev);
   SubColumn(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[i]);
@@ -607,7 +607,7 @@ TEST_F(TestTensorMath, MultColumnCpp) {
   t.CopyDataFromHostPtr(x, 3);
   d.CopyDataFromHostPtr(dat1, 6);
   MultColumn(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[i]);
@@ -624,7 +624,7 @@ TEST_F(TestTensorMath, MultColumnCuda) {
   d.ToDevice(&dev);
   MultColumn(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[i]);
@@ -640,7 +640,7 @@ TEST_F(TestTensorMath, DivColumnCuda) {
   d.ToDevice(&dev);
   DivColumn(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[i]);
@@ -656,7 +656,7 @@ TEST_F(TestTensorMath, AddRowCuda) {
   d.ToDevice(&dev);
   AddRow(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[j]);
@@ -672,7 +672,7 @@ TEST_F(TestTensorMath, SubRowCuda) {
   d.ToDevice(&dev);
   SubRow(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[j]);
@@ -688,7 +688,7 @@ TEST_F(TestTensorMath, MultRowCuda) {
   d.ToDevice(&dev);
   MultRow(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[j]);
@@ -702,7 +702,7 @@ TEST_F(TestTensorMath, DivRowCpp) {
   t.CopyDataFromHostPtr(x, 2);
   d.CopyDataFromHostPtr(dat1, 6);
   DivRow(t, &d);
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[j]);
@@ -719,7 +719,7 @@ TEST_F(TestTensorMath, DivRowCuda) {
   d.ToDevice(&dev);
   DivRow(t, &d);
   d.ToHost();
-  const float *xptr = d.data<const float *>();
+  const float *xptr = d.data<float>();
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 2; j++) {
       EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[j]);
@@ -733,7 +733,7 @@ TEST_F(TestTensorMath, SumRowsCuda) {
   d.ToDevice(&dev);
   SumRows(d, &t);
   t.ToHost();
-  const float *tptr = t.data<const float *>();
+  const float *tptr = t.data<float>();
   for (int i = 0; i < 2; i++) {
     float tmp = 0;
     for (int j = 0; j < 3; j++) {
@@ -749,7 +749,7 @@ TEST_F(TestTensorMath, SumColumnCuda) {
   d.ToDevice(&dev);
   SumColumns(d, &t);
   t.ToHost();
-  const float *tptr = t.data<const float *>();
+  const float *tptr = t.data<float>();
   for (int i = 0; i < 3; i++) {
     float tmp = 0;
     for (int j = 0; j < 2; j++) {


Mime
View raw message