Mailing-List: contact commits-help@singa.incubator.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@singa.incubator.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: zhaojing@apache.org
To: commits@singa.incubator.apache.org
Date: Mon, 13 Jun 2016 13:20:26 -0000
Message-Id: <4952e1df8904403ba8141c6006bb64c9@git.apache.org>
In-Reply-To: <ebcc8a7e534c4944892021ce8bb00ebb@git.apache.org>
References: <ebcc8a7e534c4944892021ce8bb00ebb@git.apache.org>
Subject: [33/50] [abbrv] incubator-singa git commit: SINGA-192 Implement
 optimization algorithms for v1
archived-at: Mon, 13 Jun 2016 13:20:07 -0000

SINGA-192 Implement optimization algorithms for v1

implement optimization algorithms for Singa v1 including nesterov,
adagrad, rmsprop.
Add unit test cases for these algorithms.
However, only nesterov passed the test case, adagrad and rmsprop need
Sqrt() operation for tensor which has not been implemented yet.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/178db014
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/178db014
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/178db014

Branch: refs/heads/master
Commit: 178db0144208fd5d5e7de58a575d0ea6300fdfdf
Parents: 01aaf49
Author: WANG Ji <ijingobravo@gmail.com>
Authored: Sat Jun 11 15:00:18 2016 +0800
Committer: WANG Ji <ijingobravo@gmail.com>
Committed: Sat Jun 11 16:38:27 2016 +0800

----------------------------------------------------------------------
 include/singa/model/optimizer.h |  43 +++++++++++++++
 src/model/optimizer/adagrad.cc  |  35 ++++++++++++
 src/model/optimizer/nesterov.cc |  43 +++++++++++++++
 src/model/optimizer/rmsprop.cc  |  38 +++++++++++++
 src/proto/model.proto           |   3 +
 test/singa/test_adagrad.cc      |  92 +++++++++++++++++++++++++++++++
 test/singa/test_nesterov.cc     | 101 ++++++++++++++++++++++++++++++++++
 test/singa/test_rmsprop.cc      | 103 +++++++++++++++++++++++++++++++++++
 8 files changed, 458 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/include/singa/model/optimizer.h
----------------------------------------------------------------------
diff --git a/include/singa/model/optimizer.h b/include/singa/model/optimizer.h
index 7ca9f53..7da1db8 100644
--- a/include/singa/model/optimizer.h
+++ b/include/singa/model/optimizer.h
@@ -168,6 +168,49 @@ class SGD : Optimizer {
   std::function<float(int)> momentum_generator_;
 };
 
+// =============Nesterov======================================================
+class Nesterov : Optimizer {
+ public:
+  void Setup(const OptimizerConf& conf);
+  /// Apply the updating algorithm.
+  void Apply(int step, float lr, const string& name, Tensor* grad,
+             Tensor* value) override;
+
+  /// The argument function returns the momentum value given the current running
+  /// step (i.e., iterations/mini-batches).
+  void SetMomentumGenerator(std::function<float(int)> func) {
+    momentum_generator_ = func;
+  }
+
+ private:
+  std::unordered_map<string, Tensor> history_gradient_;
+  std::function<float(int)> momentum_generator_;
+};
+
+// =============Adagrad=======================================================
+class Adagrad : Optimizer {
+ public:
+  void Setup(const OptimizerConf& conf);
+  /// Apply the updating algorithm.
+  void Apply(int step, float lr, const string& name, Tensor* grad,
+             Tensor* value) override;
+
+ private:
+  std::unordered_map<string, Tensor> history_gradient_;
+  float delta_;
+};
+// =============RMSProp=======================================================
+class RMSProp : Optimizer {
+ public:
+  void Setup(const OptimizerConf& conf);
+  /// Apply the updating algorithm.
+  void Apply(int step, float lr, const string& name, Tensor* grad,
+             Tensor* value) override;
+
+ private:
+  std::unordered_map<string, Tensor> history_gradient_;
+  float delta_, rho_;
+};
 // ============LocalAllReduce for single node multiple workers ==============
 /// Updater for training models on a single node with multiple devices (workers)
 /// All model parameters are partitioned such that each parameter is updated on

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/src/model/optimizer/adagrad.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/adagrad.cc b/src/model/optimizer/adagrad.cc
new file mode 100644
index 0000000..8bdb07c
--- /dev/null
+++ b/src/model/optimizer/adagrad.cc
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_MODEL_OPTIMIZER_ADAGRAD_H_
+#define SRC_MODEL_OPTIMIZER_ADAGRAD_H_
+#include "singa/model/optimizer.h"
+#include <functional>
+namespace singa {
+
+void Adagrad::Setup(const OptimizerConf& conf) { delta_ = conf.delta(); }
+
+void Adagrad::Apply(int step, float lr, const string& name, Tensor* grad,
+                    Tensor* value) {
+  if (history_gradient_.find(name) == history_gradient_.end())
+    history_gradient_[name].ResetLike(*value);
+  Tensor& history = history_gradient_[name];
+  history += (*grad) * (*grad);
+  (*value) -= (*grad) * lr / Sqrt(history + delta_);
+}
+}  // namespace singa
+#endif  // SRC_MODEL_OPTIMIZER_ADAGRAD_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/src/model/optimizer/nesterov.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/nesterov.cc b/src/model/optimizer/nesterov.cc
new file mode 100644
index 0000000..95c5531
--- /dev/null
+++ b/src/model/optimizer/nesterov.cc
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_MODEL_OPTIMIZER_NESTEROV_H_
+#define SRC_MODEL_OPTIMIZER_NESTEROV_H_
+#include "singa/model/optimizer.h"
+#include <functional>
+namespace singa {
+
+void Nesterov::Setup(const OptimizerConf& conf) {
+  float m = conf.momentum();
+  SetMomentumGenerator([m](int step) { return m; });
+}
+
+void Nesterov::Apply(int step, float lr, const string& name, Tensor* grad,
+                     Tensor* value) {
+  if (momentum_generator_) {
+    float mom = momentum_generator_(step);
+    if (history_gradient_.find(name) == history_gradient_.end())
+      history_gradient_[name].ResetLike(*value);
+    Tensor& history = history_gradient_[name];
+    Tensor tmp = history;
+    history = history * mom + (*grad) * lr;
+    tmp = history * (1 + mom) - tmp * mom;
+    (*value) -= tmp;
+  }
+}
+}  // namespace singa
+#endif  // SRC_MODEL_OPTIMIZER_NESTEROV_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/src/model/optimizer/rmsprop.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/rmsprop.cc b/src/model/optimizer/rmsprop.cc
new file mode 100644
index 0000000..cad333c
--- /dev/null
+++ b/src/model/optimizer/rmsprop.cc
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_MODEL_OPTIMIZER_ADAGRAD_H_
+#define SRC_MODEL_OPTIMIZER_ADAGRAD_H_
+#include "singa/model/optimizer.h"
+#include <functional>
+namespace singa {
+
+void RMSProp::Setup(const OptimizerConf& conf) {
+  delta_ = conf.delta();
+  rho_ = conf.delta();
+}
+
+void RMSProp::Apply(int step, float lr, const string& name, Tensor* grad,
+                    Tensor* value) {
+  if (history_gradient_.find(name) == history_gradient_.end())
+    history_gradient_[name].ResetLike(*value);
+  Tensor& history = history_gradient_[name];
+  history = history * rho_ + (*grad) * (*grad) * (1 - rho_);
+  (*value) -= (*grad) * lr / Sqrt(history + delta_);
+}
+}  // namespace singa
+#endif  // SRC_MODEL_OPTIMIZER_ADAGRAD_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index d368296..c26aa35 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -86,6 +86,9 @@ message OptimizerConf {
 
   // used by vanilla sgd and nesterov
   optional float momentum = 5 [default = 0.9];
+
+  // delta is used to avoid dividing zero
+  optional float delta = 6 [default = 0.0000001];
 }
 
 message ConstraintConf {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/test/singa/test_adagrad.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_adagrad.cc b/test/singa/test_adagrad.cc
new file mode 100644
index 0000000..1382467
--- /dev/null
+++ b/test/singa/test_adagrad.cc
@@ -0,0 +1,92 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "gtest/gtest.h"
+#include "singa/model/optimizer.h"
+#include "singa_config.h"
+#include <cmath>
+
+TEST(Adagrad, ApplyCPU) {
+  singa::Adagrad adagrad;
+  float lr = 0.1f;
+  const float v[4] = {0.1, 0.2, 0.3, 0.4};
+  const float g[4] = {0.01, 0.02, 0.03, 0.04};
+
+  singa::Tensor value(singa::Shape{4}), grad(singa::Shape{4});
+  value.CopyDataFromHostPtr(v, 4);
+  grad.CopyDataFromHostPtr(g, 4);
+
+  adagrad.Apply(0, lr, "xx", &grad, &value);
+
+  singa::Tensor v1 = value.Clone();
+  const float* newv1 = v1.data<const float*>();
+  float history[4];
+  for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i];
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv1[i],
+                    v[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+
+  grad.CopyDataFromHostPtr(g, 4);
+  adagrad.Apply(1, lr, "xx", &grad, &value);
+  singa::Tensor v2 = value.Clone();
+  const float* newv2 = v2.data<const float*>();
+  for (int i = 0; i < 4; ++i) history[i] += g[i] * g[i];
+
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv2[i],
+                    newv1[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+}
+
+#ifdef USE_CUDA
+TEST(Adagrad, ApplyCUDA) {
+  singa::Adagrad adagrad;
+  float lr = 0.1f;
+  const float v[4] = {0.1, 0.2, 0.3, 0.4};
+  const float g[4] = {0.01, 0.02, 0.03, 0.04};
+
+  singa::CudaGPU dev;
+  singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev);
+  value.CopyDataFromHostPtr(v, 4);
+  grad.CopyDataFromHostPtr(g, 4);
+
+  adagrad.Apply(0, lr, "xx", &grad, &value);
+
+  singa::Tensor v1 = value.Clone();
+  v1.ToHost();
+  const float* newv1 = v1.data<const float*>();
+  float history[4];
+  for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i];
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv1[i],
+                    v[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+
+  grad.CopyDataFromHostPtr(g, 4);
+  adagrad.Apply(1, lr, "xx", &grad, &value);
+  singa::Tensor v2 = value.Clone();
+  v2.ToHost();
+  const float* newv2 = v2.data<const float*>();
+  for (int i = 0; i < 4; ++i) history[i] += g[i] * g[i];
+
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv2[i],
+                    newv1[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+}
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/test/singa/test_nesterov.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_nesterov.cc b/test/singa/test_nesterov.cc
new file mode 100644
index 0000000..e7083c8
--- /dev/null
+++ b/test/singa/test_nesterov.cc
@@ -0,0 +1,101 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "gtest/gtest.h"
+#include "singa/model/optimizer.h"
+#include "singa_config.h"
+
+TEST(Nesterov, ApplyCPU) {
+  singa::Nesterov nesterov;
+  float lr = 0.1f;
+  auto func = [](int step) { return step <= 5 ? 0.5f : 0.9f; };
+  nesterov.SetMomentumGenerator(func);
+  const float v[4] = {0.1, 0.2, 0.3, 0.4};
+  const float g[4] = {0.01, 0.02, 0.03, 0.04};
+
+  singa::Tensor value(singa::Shape{4}), grad(singa::Shape{4});
+  value.CopyDataFromHostPtr(v, 4);
+  grad.CopyDataFromHostPtr(g, 4);
+
+  nesterov.Apply(0, lr, "xx", &grad, &value);
+
+  singa::Tensor v1 = value.Clone();
+  const float* newv1 = v1.data<const float*>();
+  float history[4], tmp[4];
+  for (int i = 0; i < 4; ++i) {
+    history[i] = g[i] * lr;
+    tmp[i] = history[i] * (1 + func(0));
+  }
+  for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv1[i], v[i] - tmp[i]);
+
+  grad.CopyDataFromHostPtr(g, 4);
+  nesterov.Apply(1, lr, "xx", &grad, &value);
+  singa::Tensor v2 = value.Clone();
+  const float* newv2 = v2.data<const float*>();
+  for (int i = 0; i < 4; ++i) {
+    tmp[i] = history[i];
+    history[i] = history[i] * func(1) + g[i] * lr;
+    tmp[i] = history[i] * (1 + func(1)) - tmp[i] * func(1);
+  }
+
+  for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv2[i], newv1[i] - tmp[i]);
+}
+
+#ifdef USE_CUDA
+TEST(Nesterov, ApplyCUDA) {
+  singa::Nesterov nesterov;
+  float lr = 0.1f;
+  auto func = [](int step) { return step <= 5 ? 0.5f : 0.9f; };
+  nesterov.SetMomentumGenerator(func);
+  const float v[4] = {0.1, 0.2, 0.3, 0.4};
+  const float g[4] = {0.01, 0.02, 0.03, 0.04};
+
+  singa::CudaGPU dev;
+  singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev);
+  value.CopyDataFromHostPtr(v, 4);
+  grad.CopyDataFromHostPtr(g, 4);
+
+  nesterov.Apply(0, lr, "xx", &grad, &value);
+
+  singa::Tensor v1 = value.Clone();
+  v1.ToHost();
+  const float* newv1 = v1.data<const float*>();
+  float history[4], tmp[4];
+  for (int i = 0; i < 4; ++i) {
+    history[i] = g[i] * lr;
+    tmp[i] = history[i] * (1 + func(0));
+  }
+  for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv1[i], v[i] - tmp[i]);
+
+  grad.CopyDataFromHostPtr(g, 4);
+  nesterov.Apply(1, lr, "xx", &grad, &value);
+  singa::Tensor v2 = value.Clone();
+  v2.ToHost();
+  const float* newv2 = v2.data<const float*>();
+  for (int i = 0; i < 4; ++i) {
+    tmp[i] = history[i];
+    history[i] = history[i] * func(1) + g[i] * lr;
+    tmp[i] = history[i] * (1 + func(1)) - tmp[i] * func(1);
+  }
+
+  for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv2[i], newv1[i] - tmp[i]);
+}
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/178db014/test/singa/test_rmsprop.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_rmsprop.cc b/test/singa/test_rmsprop.cc
new file mode 100644
index 0000000..62101f7
--- /dev/null
+++ b/test/singa/test_rmsprop.cc
@@ -0,0 +1,103 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "gtest/gtest.h"
+#include "singa/model/optimizer.h"
+#include "singa_config.h"
+#include <cmath>
+
+TEST(RMSProp, ApplyCPU) {
+  singa::RMSProp rmsprop;
+  float lr = 0.1f;
+  float rho = 0.002f;
+  const float v[4] = {0.1, 0.2, 0.3, 0.4};
+  const float g[4] = {0.01, 0.02, 0.03, 0.04};
+
+  singa::OptimizerConf conf;
+  conf.set_rho(rho);
+
+  singa::Tensor value(singa::Shape{4}), grad(singa::Shape{4});
+  value.CopyDataFromHostPtr(v, 4);
+  grad.CopyDataFromHostPtr(g, 4);
+
+  rmsprop.Setup(conf);
+  rmsprop.Apply(0, lr, "xx", &grad, &value);
+
+  singa::Tensor v1 = value.Clone();
+  const float* newv1 = v1.data<const float*>();
+  float history[4];
+  for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i] * (1 - rho);
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv1[i],
+                    v[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+
+  grad.CopyDataFromHostPtr(g, 4);
+  rmsprop.Apply(1, lr, "xx", &grad, &value);
+  singa::Tensor v2 = value.Clone();
+  const float* newv2 = v2.data<const float*>();
+  for (int i = 0; i < 4; ++i)
+    history[i] += history[i] * rho + g[i] * g[i] * (1 - rho);
+
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv2[i],
+                    newv1[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+}
+
+#ifdef USE_CUDA
+TEST(RMSProp, ApplyCUDA) {
+  singa::RMSProp rmsprop;
+  float lr = 0.1f;
+  float rho = 0.002f;
+  const float v[4] = {0.1, 0.2, 0.3, 0.4};
+  const float g[4] = {0.01, 0.02, 0.03, 0.04};
+
+  singa::OptimizerConf conf;
+  conf.set_rho(rho);
+
+  singa::CudaGPU dev;
+  singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev);
+  value.CopyDataFromHostPtr(v, 4);
+  grad.CopyDataFromHostPtr(g, 4);
+
+  rmsprop.Apply(0, lr, "xx", &grad, &value);
+
+  singa::Tensor v1 = value.Clone();
+  v1.ToHost();
+  const float* newv1 = v1.data<const float*>();
+  float history[4];
+  for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i] * (1 - rho);
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv1[i],
+                    v[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+
+  grad.CopyDataFromHostPtr(g, 4);
+  rmsprop.Apply(1, lr, "xx", &grad, &value);
+  singa::Tensor v2 = value.Clone();
+  v2.ToHost();
+  const float* newv2 = v2.data<const float*>();
+  for (int i = 0; i < 4; ++i)
+    history[i] += history[i] * rho + g[i] * g[i] * (1 - rho);
+
+  for (int i = 0; i < 4; ++i)
+    EXPECT_FLOAT_EQ(newv2[i],
+                    newv1[i] - lr * g[i] / sqrt(history[i] + (float)1E-8));
+}
+#endif