predictionio-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From 王斌斌 <hellomsg_nore...@163.com>
Subject use hdfs
Date Mon, 14 May 2018 07:41:39 GMT
Hi, 
   I want the tmp models datas been sotred in HDFS, not the local /tmp. And I modified the
code like this:
classALSModel(
override val rank: Int,
override val userFeatures: RDD[(Int, Array[Double])],
override val productFeatures: RDD[(Int, Array[Double])],
val userStringIntMap: BiMap[String, Int],
val itemStringIntMap: BiMap[String, Int])
extends MatrixFactorizationModel(rank, userFeatures, productFeatures)
with PersistentModel[ALSAlgorithmParams] {

def save(id: String, params: ALSAlgorithmParams,
    sc: SparkContext): Boolean = {

    sc.parallelize(Seq(rank)).saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/rank")
    userFeatures.saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/userFeatures")
    productFeatures.saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/productFeatures")
    sc.parallelize(Seq(userStringIntMap))
      .saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/userStringIntMap")
    sc.parallelize(Seq(itemStringIntMap))
      .saveAsObjectFile(s"hdfs://predictionspark:9000/tmp/${id}/itemStringIntMap")
true
}

override def toString = {
s"userFeatures: [${userFeatures.count()}]" +
s"(${userFeatures.take(2).toList}...)" +
s" productFeatures: [${productFeatures.count()}]" +
s"(${productFeatures.take(2).toList}...)" +
s" userStringIntMap: [${userStringIntMap.size}]" +
s"(${userStringIntMap.take(2)}...)" +
s" itemStringIntMap: [${itemStringIntMap.size}]" +
s"(${itemStringIntMap.take(2)}...)"
}
}

object ALSModel
extends PersistentModelLoader[ALSAlgorithmParams, ALSModel] {
def apply(id: String, params: ALSAlgorithmParams,
    sc: Option[SparkContext]) = {
new ALSModel(
      rank = sc.get.objectFile[Int](s"hdfs://predictionspark:9000/tmp/${id}/rank").first,
      userFeatures = sc.get.objectFile(s"hdfs://predictionspark:9000/tmp/${id}/userFeatures"),
      productFeatures = sc.get.objectFile(s"hdfs://predictionspark:9000/tmp/${id}/productFeatures"),
      userStringIntMap = sc.get
        .objectFile[BiMap[String, Int]](s"hdfs://predictionspark:9000/tmp/${id}/userStringIntMap").first,
      itemStringIntMap = sc.get
        .objectFile[BiMap[String, Int]](s"hdfs://predictionspark:9000/tmp/${id}/itemStringIntMap").first)
  }
}


It works.
But why the pio-env.sh says:
# HADOOP_CONF_DIR: You must configure this if you intend to run PredictionIO
#                  with Hadoop 2.
# HADOOP_CONF_DIR=/opt/hadoop


I don't do this, it also works. So someone can explain this? And what is HADOOP_CONF_DIR?
All the configurations in the hadoop server's etc/ ?
Mime
View raw message