如何将mapreduce临时工作目录/tmp更改为其他文件夹

dced5bon  于 2021-05-29  发布在  Hadoop
关注(0)|答案(1)|浏览(558)

我正在使用配置单元,我想将mapreduce临时工作目录从/tmp更改为其他目录。我试了所有能在网上找到的东西,但都没用。我可以通过du-h命令看到/tmp在mapreduce任务中被填满。请谁帮我换一下目录。
core-site.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <configuration>
   <property>
      <name>fs.default.name</name>
      <value>hdfs://localhost:9000</value>
   </property>
  <property>
     <name>hadoop.tmp.dir</name>
     <value>/data/bd/tmp/hadoop-${user.name}</value>
  </property>
   <property>
      <name>dfs.journalnode.edits.dir</name>
      <value>/data/bd/tmp/hadoop/dfs/journalnode/</value>
   </property>
</configuration>

mapred-site.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
   <property>
      <name>mapreduce.framework.name</name>
      <value>yarn</value>
   </property>
   <property>
      <name>mapreduce.cluster.local.dir</name>
      <value>/data/bd/tmp/mapred/local</value>
   </property>
   <property>
      <name>mapreduce.task.tmp.dir</name>
      <value>/data/bd/tmp</value>
   </property>
   <property>
      <name>mapreduce.cluster.temp.dir</name>
      <value>/data/bd/tmp/mapred/temp</value>
   </property>
   <property>
      <name>yarn.app.mapreduce.am.env</name>
      <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
   </property>
   <property>
      <name>mapreduce.map.env</name>
      <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
   </property>
   <property>
      <name>mapreduce.reduce.env</name>
      <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
   </property>
   <property>
      <name>mapreduce.map.memory.mb</name>
      <value>2048</value>
   </property>
   <property>
      <name>mapreduce.reduce.memory.mb</name>
      <value>4096</value>
   </property>
   <property>
      <name>yarn.app.mapreduce.am.staging-dir</name>
      <value>/data/bd/tmp/hadoop-yarn/staging</value>
   </property>
   <property>
      <name>mapreduce.jobtracker.system.dir</name>
      <value>/data/bd/tmp/mapred/system</value>
   </property>
   <property>
      <name>mapreduce.jobtracker.staging.root.dir</name>
      <value>/data/bd/tmp/mapred/staging</value>
   </property>
   <property>
      <name>mapreduce.map.output.compress</name>
      <value>true</value>
   </property>
   <property>
      <name>mapreduce.map.output.compress.codec</name>
      <value>org.apache.hadoop.io.compress.GzipCodec</value>
   </property>
</configuration>

yarn-site.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<configuration>
   <property>
      <name>yarn.nodemanager.aux-services</name>
      <value>mapreduce_shuffle</value>
   </property>
   <property>
      <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
      <value>org.apache.hadoop.mapred.ShuffleHandler</value>
   </property>
   <property>
      <name>yarn.application.classpath</name>
      <value>$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,
    $HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*</value>
   </property>
   <property>
      <name>yarn.nodemanager.vmem-check-enabled</name>
      <value>false</value>
      <description>Whether virtual memory limits will be enforced for containers</description>
   </property>
   <property>
      <name>yarn.nodemanager.vmem-pmem-ratio</name>
      <value>4</value>
      <description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
   </property>
   <property>
      <name>yarn.nodemanager.remote-app-log-dir</name>
      <value>/data/bd/tmp/logs</value>
      <description>The staging dir used while submitting jobs</description>
   </property>
   <property>
      <name>yarn.timeline-service.entity-group-fs-store.active-dir</name>
      <value>/data/bd/tmp/entity-file-history/active</value>
      <description>HDFS path to store active application’s timeline data</description>
   </property>
   <property>
      <name>yarn.timeline-service.entity-group-fs-store.done-dir</name>
      <value>/data/bd/tmp/entity-file-history/done/</value>
      <description>HDFS path to store done application’s timeline data</description>
   </property>
   <property>
      <name>yarn.nodemanager.local-dirs</name>
      <value>/data/bd/tmp/hadoop-ubuntu/nm-local-dir</value>
      <description>List of directories to store localized files</description>
   </property>
</configuration>

配置单元站点.xml

<configuration>
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://localhost/metastore?createDatabaseIfNotExist=true</value>
        <description>metadata is stored in a MySQL server</description>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
        <description>MySQL JDBC driver class</description>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>hive</value>
        <description>user name for connecting to mysql server</description>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>hive</value>
        <description>password for connecting to mysql server</description>
    </property>
    <property>
        <name>hive.exec.parallel</name>
        <value>true</value>
        <description>Whether to execute jobs in parallel</description>
    </property>
    <property>
        <name>hive.exec.parallel.thread.number</name>
        <value>8</value>
        <description>How many jobs at most can be executed in parallel</description>
    </property>
    <property>
        <name>hive.cbo.enable</name>
        <value>true</value>
        <description>Flag to control enabling Cost Based Optimizations using Calcite framework.</description>
    </property>
    <property>
        <name>hive.compute.query.using.stats</name>
        <value>true</value>
        <description>
      When set to true Hive will answer a few queries like count(1) purely using stats
      stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true.
      For more advanced stats collection need to run analyze table queries.
    </description>
    </property>
    <property>
        <name>hive.stats.fetch.partition.stats</name>
        <value>true</value>
        <description>
      Annotation of operator tree with statistics information requires partition level basic
      statistics like number of rows, data size and file size. Partition statistics are fetched from
      metastore. Fetching partition statistics for each needed partition can be expensive when the
      number of partitions is high. This flag can be used to disable fetching of partition statistics
      from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes
      and will estimate the number of rows from row schema.
    </description>
    </property>
    <property>
        <name>hive.stats.fetch.column.stats</name>
        <value>true</value>
        <description>
      Annotation of operator tree with statistics information requires column statistics.
      Column statistics are fetched from metastore. Fetching column statistics for each needed column
      can be expensive when the number of columns is high. This flag can be used to disable fetching
      of column statistics from metastore.
    </description>
    </property>
    <property>
        <name>hive.stats.autogather</name>
        <value>true</value>
        <description>A flag to gather statistics automatically during the INSERT OVERWRITE command.</description>
    </property>
    <property>
        <name>hive.stats.dbclass</name>
        <value>fs</value>
        <description>
      Expects one of the pattern in [jdbc(:.*), hbase, counter, custom, fs].
      The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), 
      each task writes statistics it has collected in a file on the filesystem, which will be aggregated 
      after the job has finished. Supported values are fs (filesystem), jdbc:database (where database 
      can be derby, mysql, etc.), hbase, counter, and custom as defined in StatsSetupConst.java.
    </description>
    </property>
    <property>
        <name>hive.exec.scratchdir</name>
        <value>/data/bd/tmp</value>
        <description>Scratch space for Hive jobs</description>
    </property>
    <property>
        <name>hive.service.metrics.file.location</name>
        <value>/data/bd/tmp/report.json</value>
        <description>For metric class org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics JSON_FILE reporter, the location of local JSON metrics file.  This file will get overwritten at every interval.</description>
    </property>
    <property>
        <name>hive.query.results.cache.directory</name>
        <value>/data/bd/tmp/hive/_resultscache_</value>
        <description>unknown</description>
    </property>
    <property>
        <name>hive.llap.io.allocator.mmap.path</name>
        <value>/data/bd/tmp</value>
        <description>unknown</description>
    </property>
    <property>
        <name>hive.hbase.snapshot.restoredir</name>
        <value>/data/bd/tmp</value>
        <description>unknown</description>
    </property>
    <property>
        <name>hive.druid.working.directory</name>
        <value>/data/bd/tmp//workingDirectory</value>
        <description>unknown</description>
    </property>
    <property>
        <name>hive.querylog.location</name>
        <value>/data/bd/tmp</value>
        <description>logs hive</description>
    </property>
</configuration>
wa7juj8i

wa7juj8i1#

对于hadoop 2.7.1
配置 mapreduce.cluster.local.dir$HADOOP_HOME/etc/hadoop/mapred-site.xml ,它还支持不同设备上以逗号分隔的目录列表。
https://hadoop.apache.org/docs/r2.7.1/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml

相关问题