Mailing-List: contact dev-help@spark.apache.org; run by ezmlm
Precedence: bulk
Received-SPF: neutral (athena.apache.org: local policy)
MIME-Version: 1.0
In-Reply-To: 
 <CA+B-+fzQH8H+x8i6yrm+Ar8p+XjX6V2yEFvG4iMjC+=AqKjBFQ@mail.gmail.com>
References: 
 <CA+B-+fzd7hZ1YbCs8PWLgSNbLeL-kyZbG3BhxJbh6QQzCX7NNQ@mail.gmail.com>
	<CAJgQjQ9bzmK8TG-OCKz=zD-V8EDaf05zy4K3rNgdGvNVRb-qSA@mail.gmail.com>
	<CA+B-+fzWF4ZjpyPWH8w4Bu2f-u7kaEbKTbKFUH0H=9JRV3N3-w@mail.gmail.com>
	<CAJgQjQ_aQwaNjDqew1OvfPOCZYoz-jU0iqk0g_zKC2pkqCS21g@mail.gmail.com>
	<CA+B-+fx4sBWg2AobpQm6ELEURg_rP4uTu-qGkymNvYSMioYMTw@mail.gmail.com>
	<CAJgQjQ97pkmAPNqHVRVB2ayE5i_sctM+y1S99kFs3c+dBr5snw@mail.gmail.com>
	<CA+B-+fz-6nS6-mPcEv=zm94ndiieAgniHjaWpro_PqONDoSYRw@mail.gmail.com>
	<CA+B-+fzLDkw=pPmCiCUcW+QEDJ12u8O870pAjz4p6OxNoG+OTQ@mail.gmail.com>
	<CAEYYnxb62V4=AEEM2k82kXfj5JKFJpgN9X38m+6LgmLTit=1ww@mail.gmail.com>
	<CA+B-+fxWRS7WvyN8veK_i0grqHVOwBX9i4s2a0Wfoii25zuzpg@mail.gmail.com>
	<CA+B-+fyrZXBRrQ9qHC-2RH2n0fpus5Cj65Cpwvnv_fmgXG+5Zg@mail.gmail.com>
	<CA+B-+fzQH8H+x8i6yrm+Ar8p+XjX6V2yEFvG4iMjC+=AqKjBFQ@mail.gmail.com>
Date: Sat, 9 Aug 2014 11:01:14 -0700
Message-ID: 
 <CAFZt-ESs4+h7-qGT4PGHcaezmONp3PHYhTkhUyHqyqcV-uiz9g@mail.gmail.com>
Subject: Re: Using mllib-1.1.0-SNAPSHOT on Spark 1.0.1
From: Matt Forbes <matt@tellapart.com>
To: Debasish Das <debasish.das83@gmail.com>
Cc: DB Tsai <dbtsai@dbtsai.com>, Xiangrui Meng <mengxr@gmail.com>,
 dev <dev@spark.apache.org>
Content-Type: multipart/alternative; boundary=047d7bb70c748f48ac0500361b35

--047d7bb70c748f48ac0500361b35
Content-Type: text/plain; charset=UTF-8

I was having this same problem early this week and had to include my
changes in the assembly.


On Sat, Aug 9, 2014 at 9:59 AM, Debasish Das <debasish.das83@gmail.com>
wrote:

> I validated that I can reproduce this problem with master as well (without
> adding any of my mllib changes)...
>
> I separated mllib jar from assembly, deploy the assembly and then I supply
> the mllib jar as --jars option to spark-submit...
>
> I get this error:
>
> 14/08/09 12:49:32 INFO DAGScheduler: Failed to run count at ALS.scala:299
>
> Exception in thread "main" org.apache.spark.SparkException: Job aborted due
> to stage failure: Task 238 in stage 40.0 failed 4 times, most recent
> failure: Lost task 238.3 in stage 40.0 (TID 10002,
> tblpmidn05adv-hdp.tdc.vzwcorp.com): java.lang.ClassCastException:
> scala.Tuple1 cannot be cast to scala.Product2
>
>
>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$5$$anonfun$apply$4.apply(CoGroupedRDD.scala:159)
>
>         scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>
>
>
> org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:138)
>
>
>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$5.apply(CoGroupedRDD.scala:159)
>
>
>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$5.apply(CoGroupedRDD.scala:158)
>
>
>
> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
>
>
>
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
>         scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
>
>
> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
>
>         org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:158)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
>
> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
>
>
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:31)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
>
> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
>
>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:129)
>
>
>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:126)
>
>
>
> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
>
>
>
> scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
>
>         scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108)
>
>
>
> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
>
>         org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:126)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
>
> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
>
>
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:31)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
>         org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33)
>
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>
>         org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61)
>
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:227)
>
>         org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
>
>         org.apache.spark.scheduler.Task.run(Task.scala:54)
>
>
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:199)
>
>
>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>
>
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>
>         java.lang.Thread.run(Thread.java:744)
>
> Driver stacktrace:
>
> at org.apache.spark.scheduler.DAGScheduler.org
>
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1153)
>
> at
>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1142)
>
> at
>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1141)
>
> at
>
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1141)
>
> at
>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:682)
>
> at
>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:682)
>
> at scala.Option.foreach(Option.scala:236)
>
> at
>
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:682)
>
> at
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1359)
>
> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
>
> at akka.actor.ActorCell.invoke(ActorCell.scala:456)
>
> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
>
> at akka.dispatch.Mailbox.run(Mailbox.scala:219)
>
> at
>
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
>
> at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
>
> at
>
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
>
> at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
>
> at
>
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
>
> I will try now with mllib inside the assembly....If that works then
> something is weird here !
>
>
> On Sat, Aug 9, 2014 at 12:46 AM, Debasish Das <debasish.das83@gmail.com>
> wrote:
>
> > Hi Xiangrui,
> >
> > Based on your suggestion I moved core and mllib both to
> 1.1.0-SNAPSHOT...I
> > am still getting class cast exception:
> >
> > Exception in thread "main" org.apache.spark.SparkException: Job aborted
> > due to stage failure: Task 249 in stage 52.0 failed 4 times, most recent
> > failure: Lost task 249.3 in stage 52.0 (TID 10002,
> > tblpmidn06adv-hdp.tdc.vzwcorp.com): java.lang.ClassCastException:
> > scala.Tuple1 cannot be cast to scala.Product2
> >
> > I am running ALS.scala merged with my changes. I will try the mllib jar
> > without my changes next...
> >
> > Can this be due to the fact that my jars are compiled with Java 1.7_55
> but
> > the cluster JRE is at 1.7_45.
> >
> > Thanks.
> >
> > Deb
> >
> >
> >
> >
> > On Wed, Aug 6, 2014 at 12:01 PM, Debasish Das <debasish.das83@gmail.com>
> > wrote:
> >
> >> I did not play with Hadoop settings...everything is compiled with
> >> 2.3.0CDH5.0.2 for me...
> >>
> >> I did try to bump the version number of HBase from 0.94 to 0.96 or 0.98
> >> but there was no profile for CDH in the pom...but that's unrelated to
> this !
> >>
> >>
> >> On Wed, Aug 6, 2014 at 9:45 AM, DB Tsai <dbtsai@dbtsai.com> wrote:
> >>
> >>> One related question, is mllib jar independent from hadoop version
> >>> (doesnt use hadoop api directly)? Can I use mllib jar compile for one
> >>> version of hadoop and use it in another version of hadoop?
> >>>
> >>> Sent from my Google Nexus 5
> >>> On Aug 6, 2014 8:29 AM, "Debasish Das" <debasish.das83@gmail.com>
> wrote:
> >>>
> >>>> Hi Xiangrui,
> >>>>
> >>>> Maintaining another file will be a pain later so I deployed spark
> 1.0.1
> >>>> without mllib and then my application jar bundles mllib 1.1.0-SNAPSHOT
> >>>> along with the code changes for quadratic optimization...
> >>>>
> >>>> Later the plan is to patch the snapshot mllib with the deployed stable
> >>>> mllib...
> >>>>
> >>>> There are 5 variants that I am experimenting with around 400M ratings
> >>>> (daily data, monthly data I will update in few days)...
> >>>>
> >>>> 1. LS
> >>>> 2. NNLS
> >>>> 3. Quadratic with bounds
> >>>> 4. Quadratic with L1
> >>>> 5. Quadratic with equality and positivity
> >>>>
> >>>> Now the ALS 1.1.0 snapshot runs fine but after completion on this step
> >>>> ALS.scala:311
> >>>>
> >>>> // Materialize usersOut and productsOut.
> >>>> usersOut.count()
> >>>>
> >>>> I am getting from one of the executors: java.lang.ClassCastException:
> >>>> scala.Tuple1 cannot be cast to scala.Product2
> >>>>
> >>>> I am debugging it further but I was wondering if this is due to RDD
> >>>> compatibility within 1.0.1 and 1.1.0-SNAPSHOT ?
> >>>>
> >>>> I have built the jars on my Mac which has Java 1.7.0_55 but the
> deployed
> >>>> cluster has Java 1.7.0_45.
> >>>>
> >>>> The flow runs fine on my localhost spark 1.0.1 with 1 worker. Can that
> >>>> Java
> >>>> version mismatch cause this ?
> >>>>
> >>>> Stack traces are below
> >>>>
> >>>> Thanks.
> >>>> Deb
> >>>>
> >>>>
> >>>> Executor stacktrace:
> >>>>
> >>>>
> >>>>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$4.apply(CoGroupedRDD.scala:156)
> >>>>
> >>>>
> >>>>
> >>>>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$4.apply(CoGroupedRDD.scala:154)
> >>>>
> >>>>
> >>>>
> >>>>
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> >>>>
> >>>>
> >>>> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> >>>>
> >>>>
> >>>> org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:154)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>>
> >>>>
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:31)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>>
> >>>>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:126)
> >>>>
> >>>>
> >>>>
> >>>>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:123)
> >>>>
> >>>>
> >>>>
> >>>>
> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
> >>>>
> >>>>
> >>>>
> >>>>
> scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
> >>>>
> >>>>
> >>>> scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108)
> >>>>
> >>>>
> >>>>
> >>>>
> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
> >>>>
> >>>>
> >>>> org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:123)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>>
> >>>>
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:31)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>> org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33)
> >>>>
> >>>>
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> >>>>
> >>>>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> >>>>
> >>>>
> >>>>
> >>>>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:158)
> >>>>
> >>>>
> >>>>
> >>>>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
> >>>>
> >>>>         org.apache.spark.scheduler.Task.run(Task.scala:51)
> >>>>
> >>>>
> >>>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183)
> >>>>
> >>>>
> >>>>
> >>>>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> >>>>
> >>>>
> >>>>
> >>>>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> >>>>
> >>>>         java.lang.Thread.run(Thread.java:744)
> >>>>
> >>>> Driver stacktrace:
> >>>>
> >>>> at org.apache.spark.scheduler.DAGScheduler.org
> >>>>
> >>>>
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1044)
> >>>>
> >>>> at
> >>>>
> >>>>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1028)
> >>>>
> >>>> at
> >>>>
> >>>>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1026)
> >>>>
> >>>> at
> >>>>
> >>>>
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> >>>>
> >>>> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> >>>>
> >>>> at
> >>>>
> >>>>
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1026)
> >>>>
> >>>> at
> >>>>
> >>>>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:634)
> >>>>
> >>>> at
> >>>>
> >>>>
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:634)
> >>>>
> >>>> at scala.Option.foreach(Option.scala:236)
> >>>>
> >>>> at
> >>>>
> >>>>
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:634)
> >>>>
> >>>> at
> >>>>
> >>>>
> org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1229)
> >>>>
> >>>> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
> >>>>
> >>>> at akka.actor.ActorCell.invoke(ActorCell.scala:456)
> >>>>
> >>>> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
> >>>>
> >>>> at akka.dispatch.Mailbox.run(Mailbox.scala:219)
> >>>>
> >>>> at
> >>>>
> >>>>
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
> >>>>
> >>>> at
> scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> >>>>
> >>>> at
> >>>>
> >>>>
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> >>>>
> >>>> at
> >>>>
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> >>>>
> >>>>  at
> >>>>
> >>>>
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> >>>>
> >>>>
> >>>> On Tue, Aug 5, 2014 at 5:59 PM, Debasish Das <
> debasish.das83@gmail.com>
> >>>> wrote:
> >>>>
> >>>> > Hi Xiangrui,
> >>>> >
> >>>> > I used your idea and kept a cherry picked version of ALS.scala in my
> >>>> > application and call it ALSQp.scala...this is a OK workaround for
> now
> >>>> till
> >>>> > a version adds up to master for example...
> >>>> >
> >>>> > For the bug with userClassPathFirst, looks like Koert already found
> >>>> this
> >>>> > issue in the following JIRA:
> >>>> >
> >>>> > https://issues.apache.org/jira/browse/SPARK-1863
> >>>> >
> >>>> > By the way the userClassPathFirst feature is very useful since I am
> >>>> sure
> >>>> > the deployed version of spark on a production cluster will always be
> >>>> the
> >>>> > last stable (core at 1.0.1 in my case) and people would like to
> deploy
> >>>> > SNAPSHOT versions of libraries that build on top of spark core
> (mllib,
> >>>> > streaming etc)...
> >>>> >
> >>>> > Another way is to have a build option that deploys only the core and
> >>>> not
> >>>> > the libraries built upon core...
> >>>> >
> >>>> > Do we have an option like that in make-distribution script ?
> >>>> >
> >>>> > Thanks.
> >>>> > Deb
> >>>> >
> >>>> >
> >>>> > On Tue, Aug 5, 2014 at 10:37 AM, Xiangrui Meng <mengxr@gmail.com>
> >>>> wrote:
> >>>> >
> >>>> >> If you cannot change the Spark jar deployed on the cluster, an easy
> >>>> >> solution would be renaming ALS in your jar. If userClassPathFirst
> >>>> >> doesn't work, could you create a JIRA and attach the log? Thanks!
> >>>> >> -Xiangrui
> >>>> >>
> >>>> >> On Tue, Aug 5, 2014 at 9:10 AM, Debasish Das <
> >>>> debasish.das83@gmail.com>
> >>>> >> wrote:
> >>>> >> > I created the assembly file but still it wants to pick the mllib
> >>>> from
> >>>> >> the
> >>>> >> > cluster:
> >>>> >> >
> >>>> >> > jar tf ./target/ml-0.0.1-SNAPSHOT-jar-with-dependencies.jar |
> grep
> >>>> >> > QuadraticMinimizer
> >>>> >> >
> >>>> >> >
> >>>> org/apache/spark/mllib/optimization/QuadraticMinimizer$$anon$1.class
> >>>> >> >
> >>>> >> > /Users/v606014/dist-1.0.1/bin/spark-submit --master
> >>>> >> > spark://TUSCA09LMLVT00C.local:7077 --class ALSDriver
> >>>> >> > ./target/ml-0.0.1-SNAPSHOT-jar-with-dependencies.jar inputPath
> >>>> >> outputPath
> >>>> >> >
> >>>> >> > Exception in thread "main" java.lang.NoSuchMethodError:
> >>>> >> >
> >>>> >>
> >>>>
> org.apache.spark.mllib.recommendation.ALS.setLambdaL1(D)Lorg/apache/spark/mllib/recommendation/ALS;
> >>>> >> >
> >>>> >> > Now if I force it to use the jar that I gave using
> >>>> >> > spark.files.userClassPathFirst, then it fails on some
> serialization
> >>>> >> > issues...
> >>>> >> >
> >>>> >> > A simple solution is to cherry pick the files I need from spark
> >>>> branch
> >>>> >> to
> >>>> >> > the application branch but I am not sure that's the right thing
> to
> >>>> do...
> >>>> >> >
> >>>> >> > The way userClassPathFirst is behaving, there might be bugs in
> >>>> it...
> >>>> >> >
> >>>> >> > Any suggestions will be appreciated....
> >>>> >> >
> >>>> >> > Thanks.
> >>>> >> > Deb
> >>>> >> >
> >>>> >> >
> >>>> >> > On Sat, Aug 2, 2014 at 11:12 AM, Xiangrui Meng <mengxr@gmail.com
> >
> >>>> >> wrote:
> >>>> >> >>
> >>>> >> >> Yes, that should work. spark-mllib-1.1.0 should be compatible
> with
> >>>> >> >> spark-core-1.0.1.
> >>>> >> >>
> >>>> >> >> On Sat, Aug 2, 2014 at 10:54 AM, Debasish Das <
> >>>> >> debasish.das83@gmail.com>
> >>>> >> >> wrote:
> >>>> >> >> > Let me try it...
> >>>> >> >> >
> >>>> >> >> > Will this be fixed if I generate a assembly file with
> >>>> mllib-1.1.0
> >>>> >> >> > SNAPSHOT
> >>>> >> >> > jar and other dependencies with the rest of the application
> >>>> code ?
> >>>> >> >> >
> >>>> >> >> >
> >>>> >> >> >
> >>>> >> >> > On Sat, Aug 2, 2014 at 10:46 AM, Xiangrui Meng <
> >>>> mengxr@gmail.com>
> >>>> >> wrote:
> >>>> >> >> >>
> >>>> >> >> >> You can try enabling "spark.files.userClassPathFirst". But
> I'm
> >>>> not
> >>>> >> >> >> sure whether it could solve your problem. -Xiangrui
> >>>> >> >> >>
> >>>> >> >> >> On Sat, Aug 2, 2014 at 10:13 AM, Debasish Das
> >>>> >> >> >> <debasish.das83@gmail.com>
> >>>> >> >> >> wrote:
> >>>> >> >> >> > Hi,
> >>>> >> >> >> >
> >>>> >> >> >> > I have deployed spark stable 1.0.1 on the cluster but I
> have
> >>>> new
> >>>> >> code
> >>>> >> >> >> > that
> >>>> >> >> >> > I added in mllib-1.1.0-SNAPSHOT.
> >>>> >> >> >> >
> >>>> >> >> >> > I am trying to access the new code using spark-submit as
> >>>> follows:
> >>>> >> >> >> >
> >>>> >> >> >> > spark-job --class
> >>>> com.verizon.bda.mllib.recommendation.ALSDriver
> >>>> >> >> >> > --executor-memory 16g --total-executor-cores 16 --jars
> >>>> >> >> >> > spark-mllib_2.10-1.1.0-SNAPSHOT.jar,scopt_2.10-3.2.0.jar
> >>>> >> >> >> > sag-core-0.0.1-SNAPSHOT.jar --rank 25 --numIterations 10
> >>>> --lambda
> >>>> >> 1.0
> >>>> >> >> >> > --qpProblem 2 inputPath outputPath
> >>>> >> >> >> >
> >>>> >> >> >> > I can see the jars are getting added to httpServer as
> >>>> expected:
> >>>> >> >> >> >
> >>>> >> >> >> > 14/08/02 12:50:04 INFO SparkContext: Added JAR
> >>>> >> >> >> >
> >>>> >> file:/vzhome/v606014/spark-glm/spark-mllib_2.10-1.1.0-SNAPSHOT.jar
> at
> >>>> >> >> >> >
> >>>> >> http://10.145.84.20:37798/jars/spark-mllib_2.10-1.1.0-SNAPSHOT.jar
> >>>> >> >> >> > with
> >>>> >> >> >> > timestamp 1406998204236
> >>>> >> >> >> >
> >>>> >> >> >> > 14/08/02 12:50:04 INFO SparkContext: Added JAR
> >>>> >> >> >> > file:/vzhome/v606014/spark-glm/scopt_2.10-3.2.0.jar at
> >>>> >> >> >> > http://10.145.84.20:37798/jars/scopt_2.10-3.2.0.jar with
> >>>> >> timestamp
> >>>> >> >> >> > 1406998204237
> >>>> >> >> >> >
> >>>> >> >> >> > 14/08/02 12:50:04 INFO SparkContext: Added JAR
> >>>> >> >> >> > file:/vzhome/v606014/spark-glm/sag-core-0.0.1-SNAPSHOT.jar
> at
> >>>> >> >> >> > http://10.145.84.20:37798/jars/sag-core-0.0.1-SNAPSHOT.jar
> >>>> with
> >>>> >> >> >> > timestamp
> >>>> >> >> >> > 1406998204238
> >>>> >> >> >> >
> >>>> >> >> >> > But the job still can't access code form mllib-1.1.0
> >>>> >> SNAPSHOT.jar...I
> >>>> >> >> >> > think
> >>>> >> >> >> > it's picking up the mllib from cluster which is at 1.0.1...
> >>>> >> >> >> >
> >>>> >> >> >> > Please help. I will ask for a PR tomorrow but internally we
> >>>> want
> >>>> >> to
> >>>> >> >> >> > generate results from the new code.
> >>>> >> >> >> >
> >>>> >> >> >> > Thanks.
> >>>> >> >> >> >
> >>>> >> >> >> > Deb
> >>>> >> >> >
> >>>> >> >> >
> >>>> >> >
> >>>> >> >
> >>>> >>
> >>>> >
> >>>> >
> >>>>
> >>>
> >>
> >
>

--047d7bb70c748f48ac0500361b35--