我正在使用配置单元,我想将mapreduce临时工作目录从/tmp更改为其他目录。我试了所有能在网上找到的东西,但都没用。我可以通过du-h命令看到/tmp在mapreduce任务中被填满。请谁帮我换一下目录。
core-site.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/bd/tmp/hadoop-${user.name}</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/bd/tmp/hadoop/dfs/journalnode/</value>
</property>
</configuration>
mapred-site.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value>/data/bd/tmp/mapred/local</value>
</property>
<property>
<name>mapreduce.task.tmp.dir</name>
<value>/data/bd/tmp</value>
</property>
<property>
<name>mapreduce.cluster.temp.dir</name>
<value>/data/bd/tmp/mapred/temp</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/data/bd/tmp/hadoop-yarn/staging</value>
</property>
<property>
<name>mapreduce.jobtracker.system.dir</name>
<value>/data/bd/tmp/mapred/system</value>
</property>
<property>
<name>mapreduce.jobtracker.staging.root.dir</name>
<value>/data/bd/tmp/mapred/staging</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>org.apache.hadoop.io.compress.GzipCodec</value>
</property>
</configuration>
yarn-site.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,
$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
<description>Whether virtual memory limits will be enforced for containers</description>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4</value>
<description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/data/bd/tmp/logs</value>
<description>The staging dir used while submitting jobs</description>
</property>
<property>
<name>yarn.timeline-service.entity-group-fs-store.active-dir</name>
<value>/data/bd/tmp/entity-file-history/active</value>
<description>HDFS path to store active application’s timeline data</description>
</property>
<property>
<name>yarn.timeline-service.entity-group-fs-store.done-dir</name>
<value>/data/bd/tmp/entity-file-history/done/</value>
<description>HDFS path to store done application’s timeline data</description>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/bd/tmp/hadoop-ubuntu/nm-local-dir</value>
<description>List of directories to store localized files</description>
</property>
</configuration>
配置单元站点.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost/metastore?createDatabaseIfNotExist=true</value>
<description>metadata is stored in a MySQL server</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>MySQL JDBC driver class</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>user name for connecting to mysql server</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
<description>password for connecting to mysql server</description>
</property>
<property>
<name>hive.exec.parallel</name>
<value>true</value>
<description>Whether to execute jobs in parallel</description>
</property>
<property>
<name>hive.exec.parallel.thread.number</name>
<value>8</value>
<description>How many jobs at most can be executed in parallel</description>
</property>
<property>
<name>hive.cbo.enable</name>
<value>true</value>
<description>Flag to control enabling Cost Based Optimizations using Calcite framework.</description>
</property>
<property>
<name>hive.compute.query.using.stats</name>
<value>true</value>
<description>
When set to true Hive will answer a few queries like count(1) purely using stats
stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true.
For more advanced stats collection need to run analyze table queries.
</description>
</property>
<property>
<name>hive.stats.fetch.partition.stats</name>
<value>true</value>
<description>
Annotation of operator tree with statistics information requires partition level basic
statistics like number of rows, data size and file size. Partition statistics are fetched from
metastore. Fetching partition statistics for each needed partition can be expensive when the
number of partitions is high. This flag can be used to disable fetching of partition statistics
from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes
and will estimate the number of rows from row schema.
</description>
</property>
<property>
<name>hive.stats.fetch.column.stats</name>
<value>true</value>
<description>
Annotation of operator tree with statistics information requires column statistics.
Column statistics are fetched from metastore. Fetching column statistics for each needed column
can be expensive when the number of columns is high. This flag can be used to disable fetching
of column statistics from metastore.
</description>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
<description>A flag to gather statistics automatically during the INSERT OVERWRITE command.</description>
</property>
<property>
<name>hive.stats.dbclass</name>
<value>fs</value>
<description>
Expects one of the pattern in [jdbc(:.*), hbase, counter, custom, fs].
The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'),
each task writes statistics it has collected in a file on the filesystem, which will be aggregated
after the job has finished. Supported values are fs (filesystem), jdbc:database (where database
can be derby, mysql, etc.), hbase, counter, and custom as defined in StatsSetupConst.java.
</description>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/data/bd/tmp</value>
<description>Scratch space for Hive jobs</description>
</property>
<property>
<name>hive.service.metrics.file.location</name>
<value>/data/bd/tmp/report.json</value>
<description>For metric class org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics JSON_FILE reporter, the location of local JSON metrics file. This file will get overwritten at every interval.</description>
</property>
<property>
<name>hive.query.results.cache.directory</name>
<value>/data/bd/tmp/hive/_resultscache_</value>
<description>unknown</description>
</property>
<property>
<name>hive.llap.io.allocator.mmap.path</name>
<value>/data/bd/tmp</value>
<description>unknown</description>
</property>
<property>
<name>hive.hbase.snapshot.restoredir</name>
<value>/data/bd/tmp</value>
<description>unknown</description>
</property>
<property>
<name>hive.druid.working.directory</name>
<value>/data/bd/tmp//workingDirectory</value>
<description>unknown</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/data/bd/tmp</value>
<description>logs hive</description>
</property>
</configuration>
1条答案
按热度按时间wa7juj8i1#
对于hadoop 2.7.1
配置
mapreduce.cluster.local.dir
在$HADOOP_HOME/etc/hadoop/mapred-site.xml
,它还支持不同设备上以逗号分隔的目录列表。https://hadoop.apache.org/docs/r2.7.1/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml