本文整理了Java中org.apache.spark.sql.DataFrame.write()
方法的一些代码示例,展示了DataFrame.write()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataFrame.write()
方法的具体详情如下:
包路径:org.apache.spark.sql.DataFrame
类名称:DataFrame
方法名:write
暂无
代码示例来源:origin: Impetus/Kundera
@Override
public void saveDataFrame(DataFrame dataFrame, Class<?> entityClazz, Map<String, Object> properties)
{
dataFrame.sqlContext().sql("use " + (String) properties.get(KEYSPACE));
dataFrame.write().insertInto((String) properties.get(TABLE));
}
}
代码示例来源:origin: stackoverflow.com
// Load a text file and convert each line to a JavaBean.
JavaRDD<Person> people = sc.textFile("/examples/people.txt")
.map(Person::parse);
// Apply a schema to an RDD
DataFrame peopleDF = sqlContext.createDataFrame(people, Person.class);
peopleDF.write()
.format("com.databricks.spark.avro")
.save("/output");
代码示例来源:origin: Impetus/Kundera
@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
try
{
Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
sparkClient.sqlContext.sql("use " + m.getSchema());
if (logger.isDebugEnabled())
{
logger.info("Below are the registered table with hive context: ");
sparkClient.sqlContext.sql("show tables").show();
}
df.write().insertInto(m.getTableName());
return true;
}
catch (Exception e)
{
throw new KunderaException("Cannot persist object(s)", e);
}
}
代码示例来源:origin: stackoverflow.com
// Map the customers to a custom class, thus adding new properties.
DataFrame storeCustomerReport = sqlContext.createDataFrame(customersWhoHaventOrderedTheProduct.toJavaRDD()
.map(row -> new StoreCustomerReport(bStoreId.value(), bReportName.getValue(), bReportTime.getValue(), bNumberOfCustomers.getValue(), row.getString(0), row.getString(1), row.getString(2))), StoreCustomerReport.class);
// Save the DataFrame to cassandra
storeCustomerReport.write().mode(SaveMode.Append)
.option("keyspace", "my_keyspace")
代码示例来源:origin: stackoverflow.com
public class OrcConvert {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("OrcConvert");
JavaSparkContext jsc = new JavaSparkContext(conf);
HiveContext hiveContext = new HiveContext(jsc);
String inputPath = args[0];
String outputPath = args[1];
DataFrame inputDf = hiveContext.read().format("com.databricks.spark.csv")
.option("quote", "'").option("delimiter", "\001")
.load(inputPath);
inputDf.write().orc(outputPath);
}
}
代码示例来源:origin: stackoverflow.com
SQLContext sqlcontext=new SQLContext(context);
DataFrame outDataFrame=sqlcontext.createDataFrame(finalOutPutRDD, WebHttpOutPutVO.class);
Properties prop = new java.util.Properties();
prop.setProperty("database", "Web_Session");
prop.setProperty("user", "user");
prop.setProperty("password", "pwd@123");
prop.setProperty("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver");
outDataFrame.write().mode(org.apache.spark.sql.SaveMode.Append).jdbc("jdbc:sqlserver://<Host>:1433", "test_table", prop);
代码示例来源:origin: XavientInformationSystems/Data-Ingestion-Platform
public void write(List<Row> rows, StructType schema, String tableName) {
if (CollectionUtils.isNotEmpty(rows))
sqlContext.createDataFrame(rows, schema).write().mode(SaveMode.Overwrite).jdbc(props.getProperty("url"),
tableName, props);
}
}
代码示例来源:origin: phuonglh/vn.vitk
switch (outputFormat) {
case JSON:
output.write().json(outputFileName);
break;
case PARQUET:
output.write().parquet(outputFileName);
break;
case TEXT:
代码示例来源:origin: amidst/toolbox
public static void writeDataToFolder(DataSpark data, String path, SQLContext sqlContext, String formatFile) throws Exception {
data.getDataFrame(sqlContext).write().mode(SaveMode.Overwrite).format(formatFile).save(path);
}
代码示例来源:origin: phuonglh/vn.vitk
@Override
public void saveImpl(String path) {
// save metadata and params
DefaultParamsWriter.saveMetadata(instance, path, sc(),
DefaultParamsWriter.saveMetadata$default$4(),
DefaultParamsWriter.saveMetadata$default$5());
// save model data: markovOrder, numLabels, weights
Data data = new Data();
data.setMarkovOrder(contextExtractor.getMarkovOrder().ordinal()+1);
data.setWeights(weights);
data.setTagDictionary(tagDictionary);
List<Data> list = new LinkedList<Data>();
list.add(data);
String dataPath = new Path(path, "data").toString();
sqlContext().createDataFrame(list, Data.class).write().parquet(dataPath);
// save pipeline model
try {
String pipelinePath = new Path(path, "pipelineModel").toString();
pipelineModel.write().overwrite().save(pipelinePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}
代码示例来源:origin: KeithSSmith/spark-compaction
public void compact(String inputPath, String outputPath) throws IOException {
this.setCompressionAndSerializationOptions(inputPath, outputPath);
this.outputCompressionProperties(this.outputCompression);
// Defining Spark Context with a generic Spark Configuration.
SparkConf sparkConf = new SparkConf().setAppName("Spark Compaction");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
if (this.outputSerialization.equals(TEXT)) {
JavaRDD<String> textFile = sc.textFile(this.concatInputPath(inputPath));
textFile.coalesce(this.splitSize).saveAsTextFile(outputPath);
} else if (this.outputSerialization.equals(PARQUET)) {
SQLContext sqlContext = new SQLContext(sc);
DataFrame parquetFile = sqlContext.read().parquet(this.concatInputPath(inputPath));
parquetFile.coalesce(this.splitSize).write().parquet(outputPath);
} else if (this.outputSerialization.equals(AVRO)) {
// For this to work the files must end in .avro
// Another issue is that when using compression the compression codec extension is not being added to the file name.
SQLContext sqlContext = new SQLContext(sc);
DataFrame avroFile = sqlContext.read().format("com.databricks.spark.avro").load(this.concatInputPath(inputPath));
avroFile.coalesce(this.splitSize).write().format("com.databricks.spark.avro").save(outputPath);
} else {
System.out.println("Did not match any serialization type: text, parquet, or avro. Recieved: " +
this.outputSerialization);
}
}
代码示例来源:origin: KeithSSmith/spark-compaction
public void compact(String[] args) throws IOException {
this.setCompressionAndSerializationOptions(this.parseCli(args));
this.outputCompressionProperties(this.outputCompression);
// Defining Spark Context with a generic Spark Configuration.
SparkConf sparkConf = new SparkConf().setAppName("Spark Compaction");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
if (this.outputSerialization.equals(TEXT)) {
JavaRDD<String> textFile = sc.textFile(this.concatInputPath(inputPath));
textFile.coalesce(this.splitSize).saveAsTextFile(outputPath);
} else if (this.outputSerialization.equals(PARQUET)) {
SQLContext sqlContext = new SQLContext(sc);
DataFrame parquetFile = sqlContext.read().parquet(this.concatInputPath(inputPath));
parquetFile.coalesce(this.splitSize).write().parquet(outputPath);
} else if (this.outputSerialization.equals(AVRO)) {
// For this to work the files must end in .avro
SQLContext sqlContext = new SQLContext(sc);
DataFrame avroFile = sqlContext.read().format("com.databricks.spark.avro").load(this.concatInputPath(inputPath));
avroFile.coalesce(this.splitSize).write().format("com.databricks.spark.avro").save(outputPath);
} else {
System.out.println("Did not match any serialization type: text, parquet, or avro. Recieved: " +
this.outputSerialization);
}
}
代码示例来源:origin: sectong/SparkToParquet
df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());
代码示例来源:origin: phuonglh/vn.vitk
df.select("dependency").write().text(outputFileName);
else
df.repartition(1).write().json(outputFileName);
内容来源于网络,如有侵权,请联系作者删除!