我正在将数据源(主要是csv文件)摄取到hdfs中。然后,将数据加载到配置单元表中。为此,在scala中实现了一个spark应用程序来计算现有配置单元表和我们尝试接收的csv文件之间的增量。
我最近在使用spark应用程序时遇到了一些问题,其中的数据源包含很多列(200到320列)。
错误显示如下:
org.codehaus.janino.JaninoRuntimeException: Code of method "eval(Lorg/apache/spark/sql/catalyst/InternalRow;)Z" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificPredicate" grows beyond 64 KB
at org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:949)
at org.codehaus.janino.CodeContext.write(CodeContext.java:839)
at org.codehaus.janino.UnitCompiler.writeOpcode(UnitCompiler.java:11081)
at org.codehaus.janino.UnitCompiler.store(UnitCompiler.java:10769)
at org.codehaus.janino.UnitCompiler.store(UnitCompiler.java:10753)
at org.codehaus.janino.UnitCompiler.compileSet2(UnitCompiler.java:5629)
at org.codehaus.janino.UnitCompiler.access$12700(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$17.visitLocalVariableAccess(UnitCompiler.java:5619)
at org.codehaus.janino.UnitCompiler$17.visitLocalVariableAccess(UnitCompiler.java:5611)
at org.codehaus.janino.Java$LocalVariableAccess.accept(Java.java:3675)
at org.codehaus.janino.UnitCompiler.compileSet(UnitCompiler.java:5611)
at org.codehaus.janino.UnitCompiler.compileSet2(UnitCompiler.java:5625)
at org.codehaus.janino.UnitCompiler.access$12200(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$17.visitAmbiguousName(UnitCompiler.java:5614)
at org.codehaus.janino.UnitCompiler$17.visitAmbiguousName(UnitCompiler.java:5611)
at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:3633)
at org.codehaus.janino.UnitCompiler.compileSet(UnitCompiler.java:5611)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:3193)
at org.codehaus.janino.UnitCompiler.access$5100(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$9.visitAssignment(UnitCompiler.java:3143)
at org.codehaus.janino.UnitCompiler$9.visitAssignment(UnitCompiler.java:3139)
at org.codehaus.janino.Java$Assignment.accept(Java.java:3847)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3139)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2112)
at org.codehaus.janino.UnitCompiler.access$1700(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$6.visitExpressionStatement(UnitCompiler.java:1377)
at org.codehaus.janino.UnitCompiler$6.visitExpressionStatement(UnitCompiler.java:1370)
at org.codehaus.janino.Java$ExpressionStatement.accept(Java.java:2558)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1436)
at org.codehaus.janino.UnitCompiler.access$1600(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1376)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1370)
at org.codehaus.janino.Java$Block.accept(Java.java:2471)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2220)
at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370)
at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1436)
at org.codehaus.janino.UnitCompiler.access$1600(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1376)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1370)
at org.codehaus.janino.Java$Block.accept(Java.java:2471)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2228)
at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1378)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1370)
at org.codehaus.janino.Java$IfStatement.accept(Java.java:2621)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1370)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1450)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:2811)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1262)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1234)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:538)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894)
at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369)
at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369)
at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:420)
at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:206)
at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:374)
at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:369)
at org.codehaus.janino.Java$AbstractPackageMemberClassDeclaration.accept(Java.java:1309)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369)
at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:345)
at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:396)
at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:311)
at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:229)
at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:196)
at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:91)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:950)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1013)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1010)
at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000)
at org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
at org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:905)
at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$.create(GeneratePredicate.scala:81)
at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$.create(GeneratePredicate.scala:40)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:889)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:886)
at org.apache.spark.sql.execution.SparkPlan.newPredicate(SparkPlan.scala:358)
at org.apache.spark.sql.execution.FilterExec$$anonfun$17.apply(basicPhysicalOperators.scala:216)
at org.apache.spark.sql.execution.FilterExec$$anonfun$17.apply(basicPhysicalOperators.scala:215)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1$$anonfun$apply$24.apply(RDD.scala:815)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1$$anonfun$apply$24.apply(RDD.scala:815)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
此日志错误后面是从上面生成的代码的转储 eval
方法。
我把代码分离出来放进一个文件。文件大小为440 kb,远远超过64 kb。
下面是代码转储:
/* 001 */ public SpecificPredicate generate(Object[] references) {
/* 002 */ return new SpecificPredicate(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificPredicate extends org.apache.spark.sql.catalyst.expressions.codegen.Predicate {
/* 006 */ private final Object[] references;
/* 007 */
/* 008 */
/* 009 */ public SpecificPredicate(Object[] references) {
/* 010 */ this.references = references;
/* 011 */
/* 012 */ }
/* 013 */
/* 014 */ public void initialize(int partitionIndex) {
/* 015 */
/* 016 */ }
/* 017 */
/* 018 */
/* 019 */
/* 020 */ public boolean eval(InternalRow i) {
/* 021 */
/* 022 */ boolean isNull1 = true;
/* 023 */ boolean value1 = false;
/* 024 */
/* 025 */ boolean isNull7 = i.isNullAt(258);
/* 026 */ int value7 = isNull7 ? -1 : (i.getInt(258));
/* 027 */ boolean isNull5 = false;
/* 028 */ UTF8String value5 = null;
/* 029 */ if (!false && isNull7) {
/* 030 */
/* 031 */ Object obj = ((Expression) references[0]).eval(null);
/* 032 */ UTF8String value8 = (UTF8String) obj;
/* 033 */ isNull5 = false;
/* 034 */ value5 = value8;
/* 035 */ } else {
/* 036 */
/* 037 */ boolean isNull10 = i.isNullAt(258);
/* 038 */ int value10 = isNull10 ? -1 : (i.getInt(258));
/* 039 */ boolean isNull9 = isNull10;
/* 040 */ UTF8String value9 = null;
/* 041 */ if (!isNull10) {
/* 042 */ value9 = UTF8String.fromString(String.valueOf(value10));
/* 043 */ }
/* 044 */ isNull5 = isNull9;
/* 045 */ value5 = value9;
/* 046 */ }
/* 047 */
/* 048 */ boolean isNull13 = i.isNullAt(14);
/* 049 */ UTF8String value13 = isNull13 ? null : (i.getUTF8String(14));
/* 050 */ boolean isNull11 = false;
/* 051 */ UTF8String value11 = null;
/* 052 */ if (!false && isNull13) {
/* 053 */
/* 054 */ Object obj1 = ((Expression) references[1]).eval(null);
/* 055 */ UTF8String value14 = (UTF8String) obj1;
/* 056 */ isNull11 = false;
/* 057 */ value11 = value14;
/* 058 */ } else {
/* 059 */
/* 060 */ boolean isNull15 = i.isNullAt(14);
/* 061 */ UTF8String value15 = isNull15 ? null : (i.getUTF8String(14));
/* 062 */ isNull11 = isNull15;
/* 063 */ value11 = value15;
/* 064 */ }
/* 065 */
// ..... It goes on until the below lines
// ..... It goes on until the below lines
/* 9816 */ }
/* 9817 */ }
经过调查,我试图通过应用以下帖子中的建议来解决这个问题,但没有成功:
https://community.cloudera.com/t5/community-articles/some-spark-thrift-server-errors-with-their-work-arounds/ta-p/246557 (见错误2)
https://community.cloudera.com/t5/community-articles/spark-job-fails-with-below-error-when-byte-code-grows-beyond/ta-p/248494
我还设置了spark配置值 spark.sql.codegen
至 false
但错误仍然存在。
类似的问题也有描述:https://issues.apache.org/jira/browse/spark-18492
您知道不修改spark应用程序的代码就可以解决这个问题吗(我已经看到,通过添加检查点,并在代码中持久化数据,我们可以消除这个错误,但即使这样,我也不知道该在哪里做)
环境信息/背景:
我正在开发环境中工作(请参阅下面的集群配置)。
spark版本:2.1.1.2.6.1.0-129
hdfs版本:2.7.3.2.6.1.0-129
hdp版本:2.6.1.0-129
如何执行作业:
我们用intellij或jenkins构建一个工件(jar),然后将其发送到服务器。然后执行bash脚本。此脚本:
使用 kinit
命令(我们使用键表和主体)
启动 spark-submit
命令中定义的所有参数
将生成的jar与依赖项一起使用
spark submit命令参数:
--形态大师:Yarn
--配置部署模式群集
--conf spark.yarn.submit.waitappcompletion=真
--conf executor内存:4g
--conf executor内核:5
--文件/etc/spark2/conf/hive-site.xml
--类(我们指定主类)
群集配置:
3个计算节点(工人)
每个6个V孔
每个节点49 mb ram
暂无答案!
目前还没有任何答案,快来回答吧!