spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zhzhan <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-2883][SQL] Spark Support for ORCFile fo...
Date Thu, 02 Apr 2015 18:40:36 GMT
Github user zhzhan commented on the pull request:

    https://github.com/apache/spark/pull/5275#issuecomment-89005187
  
    @climberus following examples demonstrate how to use it:
    
    import org.apache.spark.sql.hive.orc._
    import org.apache.spark.sql._
    //saveAsOrcFile
    case class AllDataTypes(
        stringField: String,
        intField: Int,
        longField: Long,
        floatField: Float,
        doubleField: Double,
        shortField: Short,
        byteField: Byte,
        booleanField: Boolean)
    
        val range = (0 to 255)
        val data = sc.parallelize(range).map(x => AllDataTypes(s"$x", x, x.toLong, x.toFloat,
x.toDouble, x.toShort, x.toByte, x % 2 == 0))
        data.toDF().saveAsOrcFile("orcTest")
    //read orcFile
    val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
    val orcTest = hiveContext.orcFile("orcTest")
    orcTest.registerTempTable("orcTest")
    hiveContext.sql("SELECT * from orcTest where intfield>185").collect.foreach(println)
    
      hiveContext.sql("create temporary table orc using org.apache.spark.sql.hive.orc OPTIONS
(path \"orcTest\")")
      hiveContext.sql("select * from orc").collect.foreach(println)
    val table = hiveContext.sql("select * from orc")
    table.saveAsTable("table", "org.apache.spark.sql.hive.orc")
    val hiveOrc = hiveContext.orcFile("/user/hive/warehouse/table")
    hiveOrc.registerTempTable("hiveOrc")
    hiveContext.sql("select * from hiveOrc").collect.foreach(println)
    table.saveAsOrcFile("/user/ambari-qa/table")
    hiveContext.sql("create temporary table normal_orc_as_source USING org.apache.spark.sql.hive.orc
OPTIONS (path 'saveTable') as select * from table")


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message