model fit函数失败,出现util.instrumentation错误和java.lang.arraystoreexception

nbnkbykc  于 2021-05-27  发布在  Spark
关注(0)|答案(0)|浏览(235)

我试图在spark中训练一个高斯混合模型,但是在拟合函数上失败了。数据加载正确,相同的代码在齐柏林飞艇和sparkshell中运行良好只有在使用sparksubmit运行作业时才会失败。所以我猜可能有一些依赖性问题,但不确定。
我用的是spark 2.4.4

ERROR util.Instrumentation: org.apache.spark.SparkDriverExecutionException: Execution error
    at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1377)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2107)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
    at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1364)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.RDD.take(RDD.scala:1337)
    at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1378)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.RDD.first(RDD.scala:1377)
    at org.apache.spark.ml.clustering.GaussianMixture$$anonfun$fit$1.apply(GaussianMixture.scala:357)
    at org.apache.spark.ml.clustering.GaussianMixture$$anonfun$fit$1.apply(GaussianMixture.scala:340)
    at org.apache.spark.ml.util.Instrumentation$$anonfun$11.apply(Instrumentation.scala:185)
    at scala.util.Try$.apply(Try.scala:192)
    at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:185)
    at org.apache.spark.ml.clustering.GaussianMixture.fit(GaussianMixture.scala:340)
    at com.[my_scala_class]$.stepThreeCreateClusters(ClusterML.scala:227)
    at com.[my_scala_class]$.createMLClusters(ClusterML.scala:385)
    at com.[my_scala_class]$.main(ClusterML.scala:412)
    at com.[my_scala_class].main(ClusterML.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:497)
    at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
    at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
    at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
    at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
    at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
    at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ArrayStoreException: [Lorg.apache.spark.ml.linalg.Vector;
    at scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:90)
    at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:2082)
    at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:2082)
    at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:59)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1373)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2107)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)

Exception in thread "main" org.apache.spark.SparkDriverExecutionException: Execution error
    at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1377)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2107)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
    at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1364)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.RDD.take(RDD.scala:1337)
    at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1378)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.RDD.first(RDD.scala:1377)
    at org.apache.spark.ml.clustering.GaussianMixture$$anonfun$fit$1.apply(GaussianMixture.scala:357)
    at org.apache.spark.ml.clustering.GaussianMixture$$anonfun$fit$1.apply(GaussianMixture.scala:340)
    at org.apache.spark.ml.util.Instrumentation$$anonfun$11.apply(Instrumentation.scala:185)
    at scala.util.Try$.apply(Try.scala:192)
    at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:185)
    at org.apache.spark.ml.clustering.GaussianMixture.fit(GaussianMixture.scala:340)
    at com.[my_scala_class]$.stepThreeCreateClusters(ClusterML.scala:227)
    at com.[my_scala_class]$.createMLClusters(ClusterML.scala:385)
    at com.[my_scala_class]$.main(ClusterML.scala:412)
    at com.[my_scala_class].main(ClusterML.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:497)
    at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
    at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
    at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
    at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
    at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
    at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ArrayStoreException: [Lorg.apache.spark.ml.linalg.Vector;
    at scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:90)
    at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:2082)
    at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:2082)
    at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:59)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1373)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2107)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)```

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题