如何从java代码运行hadoop hdfs命令

pw9qyyiw  于 2021-05-31  发布在  Hadoop
关注(0)|答案(5)|浏览(369)

我是hadoop新手!如何从java代码运行一些hdfs命令?我一直在用java代码和hdfs命令直接从clouderavm的终端成功地测试mapreduce,但是现在我想学习如何用java代码来做。我一直在寻找任何可以学习的材料,但还没有找到。谢谢

vql8enpb

vql8enpb1#

您可以在java代码中使用文件系统api来执行hdfs命令。https://hadoop.apache.org/docs/r2.8.2/api/org/apache/hadoop/fs/filesystem.html
请查找以下示例代码。

package com.hadoop.FilesystemClasses;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import com.hadoop.Constants.Constants;
public class HdfsFileSystemTasks {

public static Logger logger = Logger.getLogger(HdfsFileSystemTasks.class
        .getName());

public FileSystem configureFilesystem(String coreSitePath,
        String hdfsSitePath) {
    FileSystem fileSystem = null;

    try {
        Configuration conf = new Configuration();
        Path hdfsCoreSitePath = new Path(coreSitePath);
        Path hdfsHDFSSitePath = new Path(hdfsSitePath);
        conf.addResource(hdfsCoreSitePath);
        conf.addResource(hdfsHDFSSitePath);

        fileSystem = FileSystem.get(conf);
        return fileSystem;
    } catch (Exception ex) { 
        ex.printStackTrace();
        return fileSystem;
    }
}

    public String writeToHDFS(FileSystem fileSystem, String sourcePath,
        String destinationPath) {
    try {
        Path inputPath = new Path(sourcePath);
        Path outputPath = new Path(destinationPath);
        fileSystem.copyFromLocalFile(inputPath, outputPath);
        return Constants.SUCCESS;
    } catch (IOException ex) {
        ex.printStackTrace();
        return Constants.FAILURE;
    }
}

 public String readFileFromHdfs(FileSystem fileSystem, String hdfsStorePath,
        String localSystemPath) {
    try {
        Path hdfsPath = new Path(hdfsStorePath);
        Path localPath = new Path(localSystemPath);
        fileSystem.copyToLocalFile(hdfsPath, localPath);
        return Constants.SUCCESS;
    } catch (IOException ex) {
        ex.printStackTrace();
        return Constants.FAILURE;
    }
}

public String deleteHdfsDirectory(FileSystem fileSystem,
        String hdfsStorePath) {
    try {
        Path hdfsPath = new Path(hdfsStorePath);

        if (fileSystem.exists(hdfsPath)) {
            fileSystem.delete(hdfsPath);
            logger.info("Directory{} Deleted Successfully "
                    + hdfsPath);
        } else {
            logger.info("Input Directory{} does not Exists " + hdfsPath);
        }

        return Constants.SUCCESS;
    } catch (Exception ex) {
        System.out
                .println("Some exception occurred while reading file from hdfs");
        ex.printStackTrace();
        return Constants.FAILURE;
    }
}

public String deleteLocalDirectory(FileSystem fileSystem,
        String localStorePath) {
    try {
        Path localPath = new Path(localStorePath);

        if (fileSystem.exists(localPath)) {
            fileSystem.delete(localPath);
            logger.info("Input Directory{} Deleted Successfully "
                    + localPath);
        } else {
            logger.info("Input Directory{} does not Exists " + localPath);
        }
        return Constants.SUCCESS;
    } catch (Exception ex) {
        System.out
                .println("Some exception occurred while reading file from hdfs");
        ex.printStackTrace();
        return Constants.FAILURE;
    }
}

public void closeFileSystem(FileSystem fileSystem) {
    try {
        fileSystem.close();
    } catch (Exception ex) {
        ex.printStackTrace();
        System.out.println("Unable to close Hadoop filesystem : " + ex);
    }
  }
}

package com.hadoop.FileSystemTasks;

import com.hadoop.Constants.HDFSParameters;
import com.hadoop.Constants.HdfsFilesConstants;
import com.hadoop.Constants.LocalFilesConstants;
import com.hadoop.FilesystemClasses.HdfsFileSystemTasks;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;
public class ExecuteFileSystemTasks {

public static Logger logger = Logger.getLogger(ExecuteFileSystemTasks.class
        .getName());

public static void main(String[] args) {

    HdfsFileSystemTasks hdfsFileSystemTasks = new HdfsFileSystemTasks();
    FileSystem fileSystem = hdfsFileSystemTasks.configureFilesystem(
            HDFSParameters.CORE_SITE_XML_PATH,
            HDFSParameters.HDFS_SITE_XML_PATH);

    logger.info("File System Object {} " + fileSystem);

    String fileWriteStatus = hdfsFileSystemTasks.writeToHDFS(fileSystem,
            LocalFilesConstants.SALES_DATA_LOCAL_PATH,
            HdfsFilesConstants.HDFS_SOURCE_DATA_PATH);

    logger.info("File Write Status{} " + fileWriteStatus);

    String filereadStatus = hdfsFileSystemTasks.readFileFromHdfs(
            fileSystem, HdfsFilesConstants.HDFS_DESTINATION_DATA_PATH
                    + "/MR_Job_Res2/part-r-00000",
            LocalFilesConstants.MR_RESULTS_LOCALL_PATH);
    logger.info("File Read Status{} " + filereadStatus);

    String deleteDirStatus = hdfsFileSystemTasks.deleteHdfsDirectory(
            fileSystem, HdfsFilesConstants.HDFS_DESTINATION_DATA_PATH
                    + "/MR_Job_Res2");

    hdfsFileSystemTasks.closeFileSystem(fileSystem);

 }

}
dxpyg8gm

dxpyg8gm2#

我想这可能对你有帮助
我很好地使用它执行shell命令

public class JavaRunShell {
    public static void main(String[] args){  
        try {  
            String shpath="  your command";
            Process ps = Runtime.getRuntime().exec(shpath);  
            ps.waitFor();  

            }  
        catch (Exception e) {  
            e.printStackTrace();  
            }  
    } 
}
wn9m85ua

wn9m85ua3#

您可以在java代码中使用文件系统api与hdfs交互。

shyt4zoc

shyt4zoc4#

@我试过运行你的代码,但总是出错。这就是我犯的错误

java.io.IOException: Cannot run program "your": CreateProcess error=2, The system cannot 
 find the file specified
       at java.lang.ProcessBuilder.start(Unknown Source)
       at java.lang.Runtime.exec(Unknown Source)
       at java.lang.Runtime.exec(Unknown Source)
       at java.lang.Runtime.exec(Unknowenter code heren Source)
       at jrs.main(jrs.java:5)
6jjcrrmo

6jjcrrmo5#

正如jagrut所提到的,可以在java代码中使用文件系统api与hdfs命令交互。下面是我试图检查hdfs中是否存在特定目录的示例代码。如果存在,则删除该hdfs目录。

Configuration conf = new Configuration();
    Job job = new Job(conf,"HDFS Connect");

    FileSystem fs = FileSystem.get(conf);
    Path outputPath = new Path("/user/cloudera/hdfsPath");
    if(fs.exists(outputPath))
        fs.delete(outputPath);

你也可以参考给定的博客以供进一步参考-
https://dzone.com/articles/working-with-the-hadoop-file-system-api, https://hadoop.apache.org/docs/r2.8.2/api/org/apache/hadoop/fs/filesystem.htmlhttpshttp://blog.knoldus.com/2017/04/16/working-with-hadoop-filesystem-api/

相关问题