当我运行下面的查询时,我得到下面的错误。
insert overwrite table mybug select row_number() over (order by clickstream_key) as key, clickstream_key as data from mytest;
它正在启动大约2个mapper任务和240个reducer任务,工作进展顺利,速度很快,最多239个任务,需要3个小时完成4个任务尝试,然后工作就失败了。表mytest包含20亿条记录,其中一列为“clickstream\u key”。
2016-07-24 09:54:37,918 [ERROR] [TezChild] |tez.TezProcessor|: java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:3181)
at java.util.ArrayList.grow(ArrayList.java:261)
at java.util.ArrayList.ensureExplicitCapacity(ArrayList.java:235)
at java.util.ArrayList.ensureCapacityInternal(ArrayList.java:227)
at java.util.ArrayList.add(ArrayList.java:458)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$RowNumberBuffer.incr(GenericUDAFRowNumber.java:73)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$GenericUDAFRowNumberEvaluator.iterate(GenericUDAFRowNumber.java:102)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:185)
at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.evaluateWindowFunction(WindowingTableFunction.java:164)
at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.iterator(WindowingTableFunction.java:580)
at org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.finishPartition(PTFOperator.java:340)
at org.apache.hadoop.hive.ql.exec.PTFOperator.closeOp(PTFOperator.java:95)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:617)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:631)
at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.close(ReduceRecordProcessor.java:290)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:164)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:344)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:181)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:172)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:168)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2016-07-24 09:54:37,920 [INFO] [TezChild] |task.TezTaskRunner|: Encounted an error while executing task: attempt_1469365079244_0001_1_01_000000_0
java.lang.RuntimeException: java.lang.OutOfMemoryError: Java heap space
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:344)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:181)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:172)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:168)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:3181)
at java.util.ArrayList.grow(ArrayList.java:261)
at java.util.ArrayList.ensureExplicitCapacity(ArrayList.java:235)
at java.util.ArrayList.ensureCapacityInternal(ArrayList.java:227)
at java.util.ArrayList.add(ArrayList.java:458)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$RowNumberBuffer.incr(GenericUDAFRowNumber.java:73)
我尝试过增加数据节点堆、reducer堆、mapper堆、hive.tez.java.opts,但都没有成功。如果有任何线索,我们将不胜感激。错误是java heap space,它指的是哪个java heap space?
暂无答案!
目前还没有任何答案,快来回答吧!