使用sequencefile时获取nullpointerexception

goucqfw6  于 2021-06-03  发布在  Hadoop
关注(0)|答案(0)|浏览(283)

我正在从事一个涉及hadoop和mahout库的项目。我必须使用sequencefile.writer将数据写入文件,但当我尝试使用sequencefile时,会出现nullpointer异常。为了更好地理解我的问题,我编写了一个测试代码,它正在重新创建问题和错误消息。我还添加了生成示例数据的代码。
首先,我基于myutil类中的一些分布生成一个示例数据。然后使用mahout的树冠聚类库传递样本数据进行树冠聚类(在测试类中)。然后尝试使用sequencefile.writer将canopy聚类算法生成的中心点写入文件。这就是我得到空指针异常的地方(在创建序列文件编写器时)
事先谢谢你的帮助。

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.canopy.CanopyClusterer;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.math.Vector;

 public class Test {
 public static void main(String[] args) throws IOException{
 List<Vector> sampleData = new ArrayList<Vector>();
 MyUtil.generateSamples(sampleData, 400, 1, 1, 2);
 MyUtil.generateSamples(sampleData, 400, 1, 0, .5);
 MyUtil.generateSamples(sampleData, 400, 0, 2, .1);

 @SuppressWarnings("deprecation")
 List<Canopy> canopies = CanopyClusterer.createCanopies(sampleData, 
 new EuclideanDistanceMeasure(), 3.0, 1.5);

 Configuration conf = new Configuration();  
 File testData = new File("testData/points");
 if(!testData.exists()){
        testData.mkdir();
 }
 Path path = new Path("testData/points/file1");

 SequenceFile.Writer writer = SequenceFile.createWriter(conf, 
 SequenceFile.Writer.file(path),
 SequenceFile.Writer.keyClass(LongWritable.class), 
 SequenceFile.Writer.valueClass(Text.class));

    for(Canopy canopy: canopies){
        System.out.println("Canopy ID: "+canopy.getId()+" centers "+ 
                canopy.getCenter().toString());
        writer.append(new LongWritable(canopy.getId()), 
                new Text(canopy.getCenter().toString()));
    }
    writer.close();
   }
 }

myutil.generatesamples只是生成示例数据(我还添加了下面的代码)。上面的代码抛出的错误消息是

Exception in thread "main" java.lang.NullPointerException
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1010)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:445)
at org.apache.hadoop.util.Shell.run(Shell.java:418)
at  org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:739)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:722)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:633)
at org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:467)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:906)
at org.apache.hadoop.io.SequenceFile$Writer.<init>(SequenceFile.java:1071)
at org.apache.hadoop.io.SequenceFile$RecordCompressWriter.<init>(SequenceFile.java:1371)
at org.apache.hadoop.io.SequenceFile.createWriter(SequenceFile.java:272)
at Test.main(Test.java:39)

To Generate the sample data

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.random.Normal;

public class MyUtil {

  public static void generateSamples(List<Vector> vectors, int num, 
        double mx, double my, double sd){

    Normal xDist = new Normal(mx, sd);
    Normal yDist = new Normal(my, sd);

    for(int i=0; i<num; i++){
        vectors.add(new DenseVector(new double[]{xDist.sample(), yDist.sample()}));
    }
   }

  }
 }

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题