org.apache.spark.sql.DataFrame.write()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(7.8k)|赞(0)|评价(0)|浏览(452)

本文整理了Java中org.apache.spark.sql.DataFrame.write()方法的一些代码示例,展示了DataFrame.write()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataFrame.write()方法的具体详情如下:
包路径:org.apache.spark.sql.DataFrame
类名称:DataFrame
方法名:write

DataFrame.write介绍

暂无

代码示例

代码示例来源:origin: Impetus/Kundera

@Override
  public void saveDataFrame(DataFrame dataFrame, Class<?> entityClazz, Map<String, Object> properties)
  {
    dataFrame.sqlContext().sql("use " + (String) properties.get(KEYSPACE));
    dataFrame.write().insertInto((String) properties.get(TABLE));
  }
}

代码示例来源:origin: stackoverflow.com

// Load a text file and convert each line to a JavaBean.
JavaRDD<Person> people = sc.textFile("/examples/people.txt")
  .map(Person::parse);

// Apply a schema to an RDD
DataFrame peopleDF = sqlContext.createDataFrame(people, Person.class);
peopleDF.write()
  .format("com.databricks.spark.avro")
  .save("/output");

代码示例来源:origin: Impetus/Kundera

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  try
  {
    Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
    ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
    JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
    DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
    sparkClient.sqlContext.sql("use " + m.getSchema());
    if (logger.isDebugEnabled())
    {
      logger.info("Below are the registered table with hive context: ");
      sparkClient.sqlContext.sql("show tables").show();
    }
    df.write().insertInto(m.getTableName());
    return true;
  }
  catch (Exception e)
  {
    throw new KunderaException("Cannot persist object(s)", e);
  }
}

代码示例来源:origin: stackoverflow.com

// Map the customers to a custom class, thus adding new properties.
DataFrame storeCustomerReport = sqlContext.createDataFrame(customersWhoHaventOrderedTheProduct.toJavaRDD()
  .map(row -> new StoreCustomerReport(bStoreId.value(), bReportName.getValue(), bReportTime.getValue(), bNumberOfCustomers.getValue(), row.getString(0), row.getString(1), row.getString(2))), StoreCustomerReport.class);

// Save the DataFrame to cassandra
storeCustomerReport.write().mode(SaveMode.Append)
  .option("keyspace", "my_keyspace")

代码示例来源:origin: stackoverflow.com

public class OrcConvert {
  public static void main(String[] args) {
  SparkConf conf = new SparkConf().setAppName("OrcConvert");

  JavaSparkContext jsc = new JavaSparkContext(conf);
  HiveContext hiveContext = new HiveContext(jsc);

  String inputPath = args[0];
  String outputPath = args[1];

  DataFrame inputDf = hiveContext.read().format("com.databricks.spark.csv")
      .option("quote", "'").option("delimiter", "\001")
      .load(inputPath);

  inputDf.write().orc(outputPath);
 }
}

代码示例来源:origin: stackoverflow.com

SQLContext sqlcontext=new SQLContext(context);
DataFrame outDataFrame=sqlcontext.createDataFrame(finalOutPutRDD, WebHttpOutPutVO.class);
Properties prop = new java.util.Properties();
prop.setProperty("database", "Web_Session");
prop.setProperty("user", "user");
prop.setProperty("password", "pwd@123");
prop.setProperty("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver");
outDataFrame.write().mode(org.apache.spark.sql.SaveMode.Append).jdbc("jdbc:sqlserver://<Host>:1433", "test_table", prop);

代码示例来源:origin: XavientInformationSystems/Data-Ingestion-Platform

public void write(List<Row> rows, StructType schema, String tableName) {
    if (CollectionUtils.isNotEmpty(rows))
      sqlContext.createDataFrame(rows, schema).write().mode(SaveMode.Overwrite).jdbc(props.getProperty("url"),
          tableName, props);
  }
}

代码示例来源:origin: phuonglh/vn.vitk

switch (outputFormat) {
case JSON:
  output.write().json(outputFileName);
  break;
case PARQUET:
  output.write().parquet(outputFileName);
  break;
case TEXT:

代码示例来源:origin: amidst/toolbox

public static void writeDataToFolder(DataSpark data, String path, SQLContext sqlContext, String formatFile) throws Exception {
  data.getDataFrame(sqlContext).write().mode(SaveMode.Overwrite).format(formatFile).save(path);
}

代码示例来源:origin: phuonglh/vn.vitk

@Override
  public void saveImpl(String path) {
    // save metadata and params
    DefaultParamsWriter.saveMetadata(instance, path, sc(), 
        DefaultParamsWriter.saveMetadata$default$4(),
        DefaultParamsWriter.saveMetadata$default$5());
    // save model data: markovOrder, numLabels, weights
    Data data = new Data();
    data.setMarkovOrder(contextExtractor.getMarkovOrder().ordinal()+1);
    data.setWeights(weights);
    data.setTagDictionary(tagDictionary);
    List<Data> list = new LinkedList<Data>();
    list.add(data);
    String dataPath = new Path(path, "data").toString();
    sqlContext().createDataFrame(list, Data.class).write().parquet(dataPath);
    // save pipeline model
    try {
      String pipelinePath = new Path(path, "pipelineModel").toString(); 
      pipelineModel.write().overwrite().save(pipelinePath);
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

代码示例来源:origin: KeithSSmith/spark-compaction

public void compact(String inputPath, String outputPath) throws IOException {
  this.setCompressionAndSerializationOptions(inputPath, outputPath);
  this.outputCompressionProperties(this.outputCompression);
  
  // Defining Spark Context with a generic Spark Configuration.
  SparkConf sparkConf = new SparkConf().setAppName("Spark Compaction");
  JavaSparkContext sc = new JavaSparkContext(sparkConf);
  
  if (this.outputSerialization.equals(TEXT)) {
    JavaRDD<String> textFile = sc.textFile(this.concatInputPath(inputPath));
    textFile.coalesce(this.splitSize).saveAsTextFile(outputPath);
  } else if (this.outputSerialization.equals(PARQUET)) {
    SQLContext sqlContext = new SQLContext(sc);
    DataFrame parquetFile = sqlContext.read().parquet(this.concatInputPath(inputPath));
    parquetFile.coalesce(this.splitSize).write().parquet(outputPath);
  } else if (this.outputSerialization.equals(AVRO)) {
    // For this to work the files must end in .avro
    // Another issue is that when using compression the compression codec extension is not being added to the file name.
    SQLContext sqlContext = new SQLContext(sc);
    DataFrame avroFile = sqlContext.read().format("com.databricks.spark.avro").load(this.concatInputPath(inputPath));
    avroFile.coalesce(this.splitSize).write().format("com.databricks.spark.avro").save(outputPath);
  } else {
    System.out.println("Did not match any serialization type: text, parquet, or avro.  Recieved: " +
        this.outputSerialization);
  }
}

代码示例来源:origin: KeithSSmith/spark-compaction

public void compact(String[] args) throws IOException {
  this.setCompressionAndSerializationOptions(this.parseCli(args));
  this.outputCompressionProperties(this.outputCompression);
  
  // Defining Spark Context with a generic Spark Configuration.
  SparkConf sparkConf = new SparkConf().setAppName("Spark Compaction");
  JavaSparkContext sc = new JavaSparkContext(sparkConf);
      
  if (this.outputSerialization.equals(TEXT)) {
    JavaRDD<String> textFile = sc.textFile(this.concatInputPath(inputPath));
    textFile.coalesce(this.splitSize).saveAsTextFile(outputPath);
  } else if (this.outputSerialization.equals(PARQUET)) {
    SQLContext sqlContext = new SQLContext(sc);
    DataFrame parquetFile = sqlContext.read().parquet(this.concatInputPath(inputPath));
    parquetFile.coalesce(this.splitSize).write().parquet(outputPath);
  } else if (this.outputSerialization.equals(AVRO)) {
    // For this to work the files must end in .avro
    SQLContext sqlContext = new SQLContext(sc);
    DataFrame avroFile = sqlContext.read().format("com.databricks.spark.avro").load(this.concatInputPath(inputPath));
    avroFile.coalesce(this.splitSize).write().format("com.databricks.spark.avro").save(outputPath);
  } else {
    System.out.println("Did not match any serialization type: text, parquet, or avro.  Recieved: " +
        this.outputSerialization);
  }
}

代码示例来源:origin: sectong/SparkToParquet

df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

代码示例来源:origin: phuonglh/vn.vitk

df.select("dependency").write().text(outputFileName);
else 
  df.repartition(1).write().json(outputFileName);

相关文章