rust 从AnyValues向量获取原始数据类型值

3pvhb19x  于 2023-02-23  发布在  其他
关注(0)|答案(2)|浏览(237)

我可以在Rust Polars中将许多数据类型转换为AnyValue枚举。但是我如何将它们转换回原始的数据类型呢?

use polars::prelude::*;

fn main() {
    let df = df!(  "Fruit" => &["Apple", "Apple", "Pear"],
                   "Boolean" => &[true,false,true],
                   "Float64" => &[1.1,321.45,101.445])
    .unwrap();

    // get row 1 of the DataFrame as a vector of AnyValues
    let vec_anyvalue = df.get(1).unwrap();

    // trying to get the actual values:

    // getting the fruit as a String kind of works (get quotation marks too)
    let fruit = vec_anyvalue.get(0).unwrap().to_string();

    // getting the bool or the float value does not ?!

    // ERROR:  the trait `From<&AnyValue<'_>>` is not implemented for `bool`
    let boolvalue: bool = vec_anyvalue.get(1).unwrap().into();
    // ERROR:  the trait `From<AnyValue<'_>>` is not implemented for `f64`
    let floatvalue: f64 = vec_anyvalue.get(2).unwrap().into();
}
dzjeubhm

dzjeubhm1#

我认为您必须自己编写转换器:

fn to_bool<'a>(v: &AnyValue<'a>) -> bool {
    if let AnyValue::Boolean(b) = v {
        *b
    } else {
        panic!("not a boolean");
    }
}

(or返回选项/结果的变体)

46scxncf

46scxncf2#

我可以得到数据框的值 使用系列或AnyValue。

// Rust version 1.67.1 ; polars version 0.27.2
use polars::prelude::*;
use std::error::Error;

fn main() -> Result<(), Box<dyn Error>> {

    let s0 = Series::new("a", &[1.2, 2.2, 3.3]);
    let s1 = Series::new("b", &[7.6, 1.2]);
    let s2 = Series::new("c", &[4.4, -5.07, 99.3, 55.2]);
    // construct a new ListChunked for a slice of Series.
    let list = Series::new("ListFloat64", &[s0, s1, s2]);

    // construct a few more Series.
    let s0 = Series::new("Fruit", ["Apple", "Apple", "Pear"]);
    let s1 = Series::new("Boolean", [true,false,true]);
    let s2 = Series::new("Float64", [1.1,321.45,101.445]);

    let dataframe = DataFrame::new(vec![s0, s1, s2, list])?;

    println!("dataframe:\n{dataframe}\n");

    // I was able to get the DataFrame values ​​using Series.

    let column_string: &Series = dataframe.column("Fruit")?;
    let column_boolean: &Series = dataframe.column("Boolean")?;
    let column_float64: &Series = dataframe.column("Float64")?;
    let column_listfloat64: &Series = dataframe.column("ListFloat64")?;

    let vec_boolean: Vec<bool> = get_vec_boolean(column_boolean)?;
    let vec_float64: Vec<f64>  = get_vec_float64(column_float64)?;

    for row in 0 .. dataframe.height() {
        println!("Show data from row {row}:");
    
        let value_string: String = column_string.get(row)?.to_string();
        println!("\tvalue_string: {value_string}");

        let value_bool: bool = vec_boolean[row];
        println!("\tvalue_bool: {value_bool}");

        let value_f64: f64 = vec_float64[row];
        println!("\tvalue_f64: {value_f64}");

        let vec_list_f64: Vec<f64> = get_vec_from_list(column_listfloat64, row)?;
        println!("\tvec_list_f64: {vec_list_f64:?}\n");
    }

    // Using Vec<AnyValue>, row = 1, column = 2
    let row = 1;
    let column = 2;

    let vec_anyvalue: Vec<AnyValue> = dataframe.get(row).unwrap();
    let floatvalue: f64 = vec_anyvalue.get(column).unwrap().try_extract::<f64>()?;
    println!("floatvalue: {floatvalue}");

    // Using AnyValue, row = 1
    let anyvalue: AnyValue = dataframe.column("Float64")?.iter().nth(row).unwrap() ;
    let value_f64 = anyvalue.try_extract::<f64>()?;
    println!("value_f64: {value_f64}");

    Ok(())
}

例如,get_vec_boolean:

// https://docs.rs/polars/latest/polars/prelude/struct.Series.html
fn get_vec_boolean (column: &Series) -> Result<Vec<bool>, Box<dyn Error>>  {
    let vec_boolean = column
        .bool()?
        .into_iter()
        .map(|opt_bool| opt_bool.unwrap()) // option, because series can be null
        .collect::<Vec<bool>>();
    Ok(vec_boolean)
}

例如,get_vec_float64:

fn get_vec_float64 (column: &Series) -> Result<Vec<f64>, Box<dyn Error>>  {
    let vec_float64 = column
        .f64()?
        .into_iter()
        .map(|opt_f64value| opt_f64value.unwrap())
        .collect::<Vec<f64>>();
    Ok(vec_float64)
}

例如,从列表中获取向量:

fn get_vec_from_list (column: &Series, row: usize) -> Result<Vec<f64>, Box<dyn Error>>  {
    let series_list: Series = column.slice(row as i64, 1);
    let series_array: Series = series_list.explode()?;
    let vec_float64 = get_vec_float64(&series_array)?;
    Ok(vec_float64)
}

最终结果:

dataframe:
shape: (3, 4)
┌───────┬─────────┬─────────┬────────────────────────┐
│ Fruit ┆ Boolean ┆ Float64 ┆ ListFloat64            │
│ ---   ┆ ---     ┆ ---     ┆ ---                    │
│ str   ┆ bool    ┆ f64     ┆ list[f64]              │
╞═══════╪═════════╪═════════╪════════════════════════╡
│ Apple ┆ true    ┆ 1.1     ┆ [1.2, 2.2, 3.3]        │
│ Apple ┆ false   ┆ 321.45  ┆ [7.6, 1.2]             │
│ Pear  ┆ true    ┆ 101.445 ┆ [4.4, -5.07, ... 55.2] │
└───────┴─────────┴─────────┴────────────────────────┘

Show data from row 0:
    value_string: "Apple"
    value_bool: true
    value_f64: 1.1
    vec_list_f64: [1.2, 2.2, 3.3]

Show data from row 1:
    value_string: "Apple"
    value_bool: false
    value_f64: 321.45
    vec_list_f64: [7.6, 1.2]

Show data from row 2:
    value_string: "Pear"
    value_bool: true
    value_f64: 101.445
    vec_list_f64: [4.4, -5.07, 99.3, 55.2]

floatvalue: 321.45
value_f64: 321.45

相关问题