本文整理了Java中org.apache.spark.sql.DataFrame.collect()
方法的一些代码示例,展示了DataFrame.collect()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataFrame.collect()
方法的具体详情如下:
包路径:org.apache.spark.sql.DataFrame
类名称:DataFrame
方法名:collect
暂无
代码示例来源:origin: databricks/learning-spark
Row[] result = topTweets.collect();
for (Row row : result) {
System.out.println(row.get(0));
Row[] lengths = tweetLength.collect();
for (Row row : result) {
System.out.println(row.get(0));
代码示例来源:origin: org.wso2.carbon.analytics/org.wso2.carbon.analytics.spark.core
public static List<Record> dataFrameToRecordsList(int tenantId, String tableName,
DataFrame dataFrame) {
Row[] rows = dataFrame.collect();
List<Record> records = new ArrayList<>();
StructType schema = dataFrame.schema();
for (Row row : rows) {
records.add(new Record(tenantId, tableName, convertRowAndSchemaToValuesMap(row, schema)));
}
return records;
}
代码示例来源:origin: amidst/toolbox
private static ArrayList<String> getColumnStates(DataFrame data, String name) {
ArrayList<String> states = new ArrayList();
final Row[] statesRow = data.select(name).distinct().collect();
for (Row r : statesRow)
states.add( r.getString(0) );
return states;
}
代码示例来源:origin: com.cloudera.livy/livy-test-lib
@Override
public List<String> call(JobContext jc) throws Exception {
InputStream source = getClass().getResourceAsStream("/testweet.json");
// Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem).
URI input;
File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir());
Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING);
FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration());
if ("file".equals(fs.getUri().getScheme())) {
input = local.toURI();
} else {
String uuid = UUID.randomUUID().toString();
Path target = new Path("/tmp/" + uuid + "-tweets.json");
fs.copyFromLocalFile(new Path(local.toURI()), target);
input = target.toUri();
}
SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx();
sqlctx.jsonFile(input.toString()).registerTempTable("tweets");
List<String> tweetList = new ArrayList<>();
Row[] result =
(Row[])(sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10")
.collect());
for (Row r : result) {
tweetList.add(r.toString());
}
return tweetList;
}
代码示例来源:origin: ddf-project/DDF
@Override
public SqlTypedResult sqlTyped(String command, Integer maxRows, DataSourceDescriptor dataSource) throws DDFException {
DataFrame rdd = ((SparkDDFManager) this.getManager()).getHiveContext().sql(command);
Schema schema = SparkUtils.schemaFromDataFrame(rdd);
int columnSize = schema.getNumColumns();
Row[] rddRows = rdd.collect();
List<List<SqlTypedCell>> sqlTypedResult = new ArrayList<List<SqlTypedCell>>();
// Scan every cell and add the type information.
for (int rowIdx = 0; rowIdx < rddRows.length; ++rowIdx) {
List<SqlTypedCell> row = new ArrayList<SqlTypedCell>();
for (int colIdx = 0; colIdx < columnSize; ++ colIdx) {
// TODO: Optimize by reducing getType().
row.add(new SqlTypedCell(schema.getColumn(colIdx).getType(), rddRows[rowIdx].get(colIdx).toString()));
}
sqlTypedResult.add(row);
}
return new SqlTypedResult(schema, sqlTypedResult);
}
代码示例来源:origin: org.wso2.carbon.analytics/org.wso2.carbon.analytics.spark.core
private AnalyticsQueryResult toResult(DataFrame dataFrame)
throws AnalyticsExecutionException {
int resultsLimit = this.sparkConf.getInt("carbon.spark.results.limit", -1);
if (resultsLimit != -1) {
return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
convertRowsToObjects(dataFrame.limit(resultsLimit).collect()));
} else {
return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
convertRowsToObjects(dataFrame.collect()));
}
}
代码示例来源:origin: ddf-project/DDF
String sqlCmd = String.format("select distinct(%s) from %s where %s is not null", column.getName(), this.getDDF().getTableName(), column.getName());
DataFrame sqlresult = sqlContext.sql(sqlCmd);
Row[] rows = sqlresult.collect();
List<String> values = new ArrayList<>();
for(Row row: rows) {
sql = String.format("select %s from %s", sql, this.getDDF().getTableName());
DataFrame sqlResult = sqlContext.sql(sql);
Row[] rows = sqlResult.collect();
Row result = rows[0];
int i = 0;
代码示例来源:origin: phuonglh/vn.vitk
DataFrame df0 = (new SQLContext(jsc)).createDataFrame(jrdd, WhitespaceContext.class);
DataFrame df1 = model.transform(df0);
prediction = jsc.broadcast(df1.select("prediction").collect());
if (df1.count() > 0) {
output = s.map(new WhitespaceClassificationFunction());
内容来源于网络,如有侵权,请联系作者删除!