maven 从Eclipse IDE运行Spark程序- pom.xml很好,但会出现运行时错误

waxmsbnn  于 2023-06-05  发布在  Maven
关注(0)|答案(1)|浏览(280)

我正在执行Ubuntu Linux 22.04 LTS - Eclipse IDE中的Spark程序。它使用Oracle Sun JDK 1.8.0_361。x1c 0d1x已引用此thread
它直接从Eclipse IDE执行Spark程序。获取Spark-SQL程序的问题常规Spark程序都工作正常。
这个程序

package org.example;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import static org.apache.spark.sql.functions.avg;
import static org.apache.spark.sql.functions.col;
import static org.apache.spark.sql.functions.max;

public class HousePriceSolution {

    private static final String PRICE = "Price";
    private static final String PRICE_SQ_FT = "Price SQ Ft";

    public static void main(String[] args) throws Exception 
    {

        Logger.getLogger("org").setLevel(Level.ERROR);
        SparkSession session = SparkSession.builder().appName("HousePriceSolution").master("local[1]").getOrCreate();

        Dataset<Row> realEstate = session.read().option("header", "true").csv("src/main/resources/RealEstate.csv");

        Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long"))
                                                  .withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));

        castedRealEstate.groupBy("Location")
                        .agg(avg(PRICE_SQ_FT), max(PRICE))
                        .orderBy(col("avg(" + PRICE_SQ_FT + ")").desc())
                        .show();
    }
}

下面是相应的pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>sparkwordcount</groupId>
  <artifactId>sparkwordcount</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>
  <name>"Spark Word Count"</name>
  
  <repositories>
    <repository>
      <id>scala-tools.org</id>
      <name>Scala-tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </repository>
  </repositories>

  <pluginRepositories>
    <pluginRepository>
      <id>scala-tools.org</id>
      <name>Scala-tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </pluginRepository>
  </pluginRepositories>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
  </properties>

  <build>
    <plugins>
<!-- this plugin is for scala code. It uses the version of the Scala library dependency to pick the Scala version -->
<!--
      <plugin>
    <groupId>net.alchim31.maven</groupId>
    <artifactId>scala-maven-plugin</artifactId>
    <version>4.4.0</version>
        <executions>
          <execution>
            <goals>
              <goal>compile</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      -->
    
<!-- this plugin is for java code. the source and target versions are Java versions -->
      <plugin>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>3.8.1</version>
        <configuration>
          <source>1.8</source>
          <target>1.8</target>
        </configuration>
      </plugin>
    </plugins>  
  </build>

  <dependencies>
     
    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>2.12.17</version>
    </dependency>
   
    <dependency>
      <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.13</artifactId>
        <version>3.4.0</version>
      <scope>provided</scope>
    </dependency>
    <!-- the following aren't needed for the word count demo, but
     will be for more complex things.
    -->
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-text</artifactId>
      <version>1.6</version>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.13</artifactId>
    <version>3.4.0</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming_2.13</artifactId>
    <version>3.4.0</version>
      <scope>provided</scope>
    </dependency>
    
    <dependency>
    <groupId>org.apache.commons</groupId>
    <artifactId>commons-lang3</artifactId>
    <version>3.12.0</version>
</dependency>
<!-- the following artifacts are also avaiable, depending upon
     what you're doing. They would use  the same groupId and version as the ones above:
 spark-mllib
 spark-hive
 spark-catalyst
 spark-streaming-kafka
 spark-repl
 spark-graphx
-->
  </dependencies>
</project>

但我在运行时得到这些错误,构建都是好的

Exception in thread "main" java.lang.NoClassDefFoundError: scala/$less$colon$less
    at org.example.HousePriceSolution.main(HousePriceSolution.java:22)
Caused by: java.lang.ClassNotFoundException: scala.$less$colon$less
    at java.net.URLClassLoader.findClass(URLClassLoader.java:387)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
    ... 1 more

我也试过了,但没有成功

5anewei6

5anewei61#

听起来你的Scala,Spark和KafkaClient之间的版本不匹配。您的pom.xml中包含以下内容

<!-- scala 2.12.x -->
    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>2.12.17</version>
    </dependency>

    <!-- spark using scala 2.13.x -->
    <dependency>
      <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.13</artifactId>
        <version>3.4.0</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.13</artifactId>
    <version>3.4.0</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming_2.13</artifactId>
    <version>3.4.0</version>
      <scope>provided</scope>
    </dependency>

我想修好它,问题就解决了

相关问题