org.apache.hadoop.fs.FileUtil.copyMerge()方法的使用及代码示例

x33g5p2x  于2022-01-19 转载在 其他  
字(9.8k)|赞(0)|评价(0)|浏览(290)

本文整理了Java中org.apache.hadoop.fs.FileUtil.copyMerge()方法的一些代码示例,展示了FileUtil.copyMerge()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。FileUtil.copyMerge()方法的具体详情如下:
包路径:org.apache.hadoop.fs.FileUtil
类名称:FileUtil
方法名:copyMerge

FileUtil.copyMerge介绍

[英]Copy all files in a directory to one output file (merge).
[中]将目录中的所有文件复制到一个输出文件(合并)。

代码示例

代码示例来源:origin: intel-hadoop/HiBench

} finally {
  fs.delete(DfsioeConfig.getInstance().getReportTmp(fsConfig), true);
  FileUtil.copyMerge(fs, DfsioeConfig.getInstance().getReportDir(fsConfig), fs, DfsioeConfig.getInstance().getReportTmp(fsConfig), false, fsConfig, null);
  LOG.info("remote report file " + DfsioeConfig.getInstance().getReportTmp(fsConfig) + " merged.");
  BufferedReader lines = new BufferedReader(new InputStreamReader(new DataInputStream(fs.open(DfsioeConfig.getInstance().getReportTmp(fsConfig)))));

代码示例来源:origin: yahoo/FEL

public boolean getMergeInHdfs(String src, String dest, JobConf conf) throws IllegalArgumentException, IOException {
  FileSystem fs = FileSystem.get(conf);
  Path srcPath = new Path(src);
  Path dstPath = new Path(dest);
  // Check if the path already exists
  if (!(fs.exists(srcPath))) {
    LOG.info("Path " + src + " does not exists!");
    return false;
  }
  if (!(fs.exists(dstPath))) {
    LOG.info("Path " + dest + " does not exists!");
    return false;
  }
  return FileUtil.copyMerge(fs, srcPath, fs, dstPath, false, conf, null);
}

代码示例来源:origin: org.apache.hadoop/hadoop-common-test

boolean deleteSource = false;
String addString = null;
result = FileUtil.copyMerge(fs, srcPath, fs, dstPath, deleteSource, conf,
  addString);

代码示例来源:origin: LiveRamp/cascading_ext

/**
 * merge all files in <code>sourceDir</code> into local <code>targetFile</code>, retrying on failure
 */
@Deprecated
public static void copyMergeToLocal(String srcDir, String dstFile, int numTries, long delayBetweenTries) throws IOException {
 Configuration conf = new Configuration();
 FileSystem hdfs = getFS();
 FileSystem localfs = FileSystem.getLocal(conf);
 while (numTries-- > 0) {
  if (FileUtil.copyMerge(hdfs, new Path(srcDir), localfs, new Path(dstFile), false, conf, null)) {
   return;
  }
  try {
   Thread.sleep(delayBetweenTries);
  } catch (InterruptedException ie) {
   throw new RuntimeException(ie);
  }
 }
 throw new IOException("Could not copyMerge from \"" + srcDir + "\" to \"" + dstFile + "\"!");
}

代码示例来源:origin: edu.umd/cloud9

/**
 * @param inputFiles a glob expression of the files to be merged
 * @param outputFile a destination file path
 * @param deleteSource delete source files after merging
 * @throws IOException
 */
private static Path mergeTextFiles(Configuration configuration, String inputFiles,
  String outputFile, boolean deleteSource, boolean deleteDestinationFileIfExist)
  throws IOException {
 JobConf conf = new JobConf(configuration, FileMerger.class);
 FileSystem fs = FileSystem.get(conf);
 Path inputPath = new Path(inputFiles);
 Path outputPath = new Path(outputFile);
 if (deleteDestinationFileIfExist) {
  if (fs.exists(outputPath)) {
   // carefully remove the destination file, not recursive
   fs.delete(outputPath, false);
   sLogger.info("Warning: remove destination file since it already exists...");
  }
 } else {
  Preconditions.checkArgument(!fs.exists(outputPath), new IOException(
    "Destination file already exists..."));
 }
 FileUtil.copyMerge(fs, inputPath, fs, outputPath, deleteSource, conf, FILE_CONTENT_DELIMITER);
 sLogger.info("Successfully merge " + inputPath.toString() + " to " + outputFile);
 return outputPath;
}

代码示例来源:origin: org.springframework.data/spring-data-hadoop-core

public void getmerge(String src, String localdst, boolean addnl) {
  Path srcPath = new Path(src);
  Path dst = new Path(localdst);
  try {
    FileSystem srcFs = getFS(srcPath);
    Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
    for (int i = 0; i < srcs.length; i++) {
      if (addnl) {
        FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false, configuration,
            "\n");
      }
      else {
        FileUtil.copyMerge(srcFs, srcs[i], FileSystem.getLocal(configuration), dst, false, configuration,
            null);
      }
    }
  } catch (IOException ex) {
    throw new HadoopException("Cannot getmerge " + ex.getMessage(), ex);
  }
}

代码示例来源:origin: org.apache.camel/camel-hdfs2

private File getHfdsFileToTmpFile(String hdfsPath, HdfsConfiguration configuration) {
    try {
      String fname = hdfsPath.substring(hdfsPath.lastIndexOf('/'));
      File outputDest = File.createTempFile(fname, ".hdfs");
      if (outputDest.exists()) {
        outputDest.delete();
      }
      HdfsInfo hdfsInfo = HdfsInfoFactory.newHdfsInfo(hdfsPath);
      FileSystem fileSystem = hdfsInfo.getFileSystem();
      FileUtil.copy(fileSystem, new Path(hdfsPath), outputDest, false, fileSystem.getConf());
      try {
        FileUtil.copyMerge(
            fileSystem, // src
            new Path(hdfsPath),
            FileSystem.getLocal(new Configuration()), // dest
            new Path(outputDest.toURI()),
            false, fileSystem.getConf(), null);
      } catch (IOException e) {
        return outputDest;
      }
      return new File(outputDest, fname);
    } catch (IOException ex) {
      throw new RuntimeCamelException(ex);
    }
  }
},

代码示例来源:origin: org.apache.crunch/crunch-spark

private void distributeFiles() {
 try {
  URI[] uris = DistributedCache.getCacheFiles(conf);
  if (uris != null) {
   URI[] outURIs = new URI[uris.length];
   for (int i = 0; i < uris.length; i++) {
    Path path = new Path(uris[i]);
    FileSystem fs = path.getFileSystem(conf);
    if (fs.isFile(path)) {
     outURIs[i] = uris[i];
    } else {
     Path mergePath = new Path(path.getParent(), "sparkreadable-" + path.getName());
     FileUtil.copyMerge(fs, path, fs, mergePath, false, conf, "");
     outURIs[i] = mergePath.toUri();
    }
    sparkContext.addFile(outURIs[i].toString());
   }
   DistributedCache.setCacheFiles(outURIs, conf);
  }
 } catch (IOException e) {
  throw new RuntimeException("Error retrieving cache files", e);
 }
}

代码示例来源:origin: apache/crunch

private void distributeFiles() {
 try {
  URI[] uris = DistributedCache.getCacheFiles(conf);
  if (uris != null) {
   URI[] outURIs = new URI[uris.length];
   for (int i = 0; i < uris.length; i++) {
    Path path = new Path(uris[i]);
    FileSystem fs = path.getFileSystem(conf);
    if (fs.isFile(path)) {
     outURIs[i] = uris[i];
    } else {
     Path mergePath = new Path(path.getParent(), "sparkreadable-" + path.getName());
     FileUtil.copyMerge(fs, path, fs, mergePath, false, conf, "");
     outURIs[i] = mergePath.toUri();
    }
    sparkContext.addFile(outURIs[i].toString());
   }
   DistributedCache.setCacheFiles(outURIs, conf);
  }
 } catch (IOException e) {
  throw new RuntimeException("Error retrieving cache files", e);
 }
}

代码示例来源:origin: org.jvnet.hudson.hadoop/hadoop-core

/**
 * Get all the files in the directories that match the source file pattern
 * and merge and sort them to only one file on local fs 
 * srcf is kept.
 * 
 * Also adds a string between the files (useful for adding \n
 * to a text file)
 * @param srcf: a file pattern specifying source files
 * @param dstf: a destination local file/directory
 * @param endline: if an end of line character is added to a text file 
 * @exception: IOException  
 * @see org.apache.hadoop.fs.FileSystem.globStatus 
 */
void copyMergeToLocal(String srcf, Path dst, boolean endline) throws IOException {
 Path srcPath = new Path(srcf);
 FileSystem srcFs = srcPath.getFileSystem(getConf());
 Path [] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), 
                   srcPath);
 for(int i=0; i<srcs.length; i++) {
  if (endline) {
   FileUtil.copyMerge(srcFs, srcs[i], 
             FileSystem.getLocal(getConf()), dst, false, getConf(), "\n");
  } else {
   FileUtil.copyMerge(srcFs, srcs[i], 
             FileSystem.getLocal(getConf()), dst, false, getConf(), null);
  }
 }
}

代码示例来源:origin: com.facebook.hadoop/hadoop-core

/**
 * Get all the files in the directories that match the source file pattern
 * and merge and sort them to only one file on local fs
 * srcf is kept.
 *
 * Also adds a string between the files (useful for adding \n
 * to a text file)
 * @param srcf: a file pattern specifying source files
 * @param dstf: a destination local file/directory
 * @param endline: if an end of line character is added to a text file
 * @exception: IOException
 * @see org.apache.hadoop.fs.FileSystem.globStatus
 */
void copyMergeToLocal(String srcf, Path dst, boolean endline) throws IOException {
 Path srcPath = new Path(srcf);
 FileSystem srcFs = srcPath.getFileSystem(getConf());
 Path [] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath),
                   srcPath);
 for(int i=0; i<srcs.length; i++) {
  if (endline) {
   FileUtil.copyMerge(srcFs, srcs[i],
             FileSystem.getLocal(getConf()), dst, false, getConf(), "\n");
  } else {
   FileUtil.copyMerge(srcFs, srcs[i],
             FileSystem.getLocal(getConf()), dst, false, getConf(), null);
  }
 }
}

代码示例来源:origin: com.github.jiayuhan-it/hadoop-common

/**
 * Calls FileUtil.copyMerge using the specified source and destination paths.
 * Both source and destination are assumed to be on the local file system.
 * The call will not delete source on completion and will not add an
 * additional string between files.
 * @param src String non-null source path.
 * @param dst String non-null destination path.
 * @return boolean true if the call to FileUtil.copyMerge was successful.
 * @throws IOException if an I/O error occurs.
 */
private boolean copyMerge(String src, String dst)
  throws IOException {
 Configuration conf = new Configuration();
 FileSystem fs = FileSystem.getLocal(conf);
 final boolean result;
 try {
  Path srcPath = new Path(TEST_ROOT_DIR, src);
  Path dstPath = new Path(TEST_ROOT_DIR, dst);
  boolean deleteSource = false;
  String addString = null;
  result = FileUtil.copyMerge(fs, srcPath, fs, dstPath, deleteSource, conf,
    addString);
 }
 finally {
  fs.close();
 }
 return result;
}

代码示例来源:origin: ch.cern.hadoop/hadoop-common

/**
 * Calls FileUtil.copyMerge using the specified source and destination paths.
 * Both source and destination are assumed to be on the local file system.
 * The call will not delete source on completion and will not add an
 * additional string between files.
 * @param src String non-null source path.
 * @param dst String non-null destination path.
 * @return boolean true if the call to FileUtil.copyMerge was successful.
 * @throws IOException if an I/O error occurs.
 */
private boolean copyMerge(String src, String dst)
  throws IOException {
 Configuration conf = new Configuration();
 FileSystem fs = FileSystem.getLocal(conf);
 final boolean result;
 try {
  Path srcPath = new Path(TEST_ROOT_DIR, src);
  Path dstPath = new Path(TEST_ROOT_DIR, dst);
  boolean deleteSource = false;
  String addString = null;
  result = FileUtil.copyMerge(fs, srcPath, fs, dstPath, deleteSource, conf,
    addString);
 }
 finally {
  fs.close();
 }
 return result;
}

代码示例来源:origin: org.apache.mahout/mahout-mrlegacy

FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null);

代码示例来源:origin: org.apache.mahout/mahout-core

FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null);

代码示例来源:origin: org.apache.mahout/mahout-mr

FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null);

相关文章