从csv文件中阅读数据并放入2D浮点向量,使csv文件的列为2D向量的行

rslzwgfq  于 2023-05-11  发布在  其他
关注(0)|答案(2)|浏览(101)

我被一个非常复杂的问题缠住了。我目前正在为一个类项目制作并行RMSProp优化算法。我想用真实的数据https://www.kaggle.com/code/prasadperera/the-boston-housing-dataset测试我的算法从csv文件读取数据的示例代码如下:

#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>

int main() {
    std::ifstream infile("housing.csv");

    // Check if file is open
    if (!infile.is_open()) {
        std::cout << "Error opening file\n";
        return 1;
    }

    std::string line;
    std::vector<std::vector<std::string>> data; // 2D vector to store data

    // Read data line by line
    while (std::getline(infile, line)) {
        std::stringstream ss(line);
        std::vector<std::string> row;
        std::string cell;

        // Read each cell of the row and add to vector
        while (std::getline(ss, cell, ',')) {
            row.push_back(cell);
        }

        // Add row to 2D vector
        data.push_back(row);
    }

    // Print the data
    for (const auto& row : data) {
        for (const auto& cell : row) {
            std::cout << cell << " ";
        }
        std::cout << std::endl;
    }

    infile.close(); // Close the file
    return 0;
}

我想把这个值放在一个2D浮点向量中,这样csv文件的每一列都成为我的2D向量的一行。我的csv文件的最后一列进入另一个1D向量。这真的是可悲的,我说,我尝试了一切,但不能得到它的工作在所有,因为我是新的编程。如果有人能在这方面帮助我或提出一些建议,那将是非常大的帮助。

  • 编辑 *
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>

using namespace std;

int main() {
    // Open the CSV file for reading
    ifstream infile("housing.csv");
    // Check if file is open
    if (!infile.is_open()) {
        std::cout << "Error opening file\n";
        return 1;
    }

    std::string line;
    std::vector<std::vector<std::string>> data; // 2D vector to store data
    std::vector<std::string> somewhere;

    bool first_line = true;
    // Read each line of the CSV file

    while (std::getline(infile, line)) {
        std::stringstream ss(line);
        std::vector<std::string> work;

        std::string cell;
        // Read each cell of the line and add to working vector
        while (std::getline(ss, cell, ',')) {
            work.push_back(cell);
        }

        // last item goes to somewhere
        somewhere.push_back(work.back());
        // remove last item from working vector
        work.pop_back();

    // set data size if first line of input
        if (first_line)
        {
            data.resize(work.size());
            first_line = false;
        }

    // Add working vector to data
        for (size_t i = 0; i < work.size(); ++i){
            data[i].push_back(work[i]);
        }
       
    }

    for (int i = 0; i < data.size(); i++) {
        for (int j = 0; j < data[i].size(); j++) {
            std::cout << data[i][j] << " ";
        }
        std::cout << std::endl;
    }


    return 0;
}

结果:

Job ID is 45232337
Running on host r05r08n02
Working directory is /panfs/pfs.local/work/i2s/Adam
The following processors are allocated to this job:
r05r08n02
Start: 2023-05-10_02:03:37
Stop: 2023-05-10_02:03:40

housing.csv中的数据如下所示:

0.00632  18.00   2.310  0  0.5380  6.5750  65.20  4.0900   1  296.0  15.30 396.90   4.98  24.00
 0.02731   0.00   7.070  0  0.4690  6.4210  78.90  4.9671   2  242.0  17.80 396.90   9.14  21.60
 0.02729   0.00   7.070  0  0.4690  7.1850  61.10  4.9671   2  242.0  17.80 392.83   4.03  34.70
 0.03237   0.00   2.180  0  0.4580  6.9980  45.80  6.0622   3  222.0  18.70 394.63   2.94  33.40
 0.06905   0.00   2.180  0  0.4580  7.1470  54.20  6.0622   3  222.0  18.70 396.90   5.33  36.20
 0.02985   0.00   2.180  0  0.4580  6.4300  58.70  6.0622   3  222.0  18.70 394.12   5.21  28.70
 0.08829  12.50   7.870  0  0.5240  6.0120  66.60  5.5605   5  311.0  15.20 395.60  12.43  22.90
 0.14455  12.50   7.870  0  0.5240  6.1720  96.10  5.9505   5  311.0  15.20 396.90  19.15  27.10

编辑

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>

int main() {
    std::ifstream file("housing.csv");
    std::vector<std::vector<std::string>> data;
    std::string line;

    while (std::getline(file, line)) {
        std::vector<std::string> row;
        std::stringstream ss(line);
        std::string cell;

        while (std::getline(ss, cell, '\t')) {
            row.push_back(cell);
        }

        data.push_back(row);
    }

    // Print the data
    for (auto& row : data) {
        for (auto& cell : row) {
            std::cout << cell << '\t';
        }
        std::cout << '\n';
    }

    return 0;
}
Start: 2023-05-10_02:51:22
 0.00632  18.00   2.310  0  0.5380  6.5750  65.20  4.0900   1  296.0  15.30 396.90   4.98  24.00    
 0.02731   0.00   7.070  0  0.4690  6.4210  78.90  4.9671   2  242.0  17.80 396.90   9.14  21.60    
 0.02729   0.00   7.070  0  0.4690  7.1850  61.10  4.9671   2  242.0  17.80 392.83   4.03  34.70    
 0.03237   0.00   2.180  0  0.4580  6.9980  45.80  6.0622   3  222.0  18.70 394.63   2.94  33.40    
 0.06905   0.00   2.180  0  0.4580  7.1470  54.20  6.0622   3  222.0  18.70 396.90   5.33  36.20    
 0.02985   0.00   2.180  0  0.4580  6.4300  58.70  6.0622   3  222.0  18.70 394.12   5.21  28.70    
 0.08829  12.50   7.870  0  0.5240  6.0120  66.60  5.5605   5  311.0  15.20 395.60  12.43  22.90    
 0.14455  12.50   7.870  0  0.5240  6.1720  96.10  5.9505   5  311.0  15.20 396.90  19.15  27.10    
 0.21124  12.50   7.870  0  0.5240  5.6310 100.00  6.0821   5  311.0  15.20 386.63  29.93  16.50    
 0.17004  12.50   7.870  0  0.5240  6.0040  85.90  6.5921   5  311.0  15.20 386.71  17.10  18.90    
 0.22489  12.50   7.870  0  0.5240  6.3770  94.30  6.3467   5  311.0  15.20 392.52  20.45  15.00    
 0.11747  12.50   7.870  0  0.5240  6.0090  82.90  6.2267   5  311.0  15.20 396.90  13.27  18.90    
 0.09378  12.50   7.870  0  0.5240  5.8890  39.00  5.4509   5  311.0  15.20 390.50  15.71  21.70    
 0.6
flmtquvp

flmtquvp1#

就像这样

// Read data line by line
bool first_line = true;
while (std::getline(infile, line)) {
    std::stringstream ss(line);
    std::vector<std::string> work;

    std::string cell;
    // Read each cell of the line and add to working vector
    while (std::getline(ss, cell, ',')) {
        work.push_back(cell);
    }

    // last item goes to somewhere
    somewhere.push_back(work.back());
    // remove last item from working vector
    work.pop_back();

    // set data size if first line of input
    if (first_line)
    {
        data.resize(work.size());
        first_line = false;
    }

    // Add working vector to data
    for (size_t i = 0; i < work.size(); ++i)
        data[i].push_back(work[i]);
}

这是未经测试的代码。代码中的错误检查也很少,特别是它假设所有行都有相同数量的数据项。

kiayqfof

kiayqfof2#

通常我会对两次阅读一个文件的低效率感到不安。但是,在这种情况下,它可能会对您有所帮助,因为它允许您首先确定行数和列数,设置一个双精度型数组,然后直接读取它。
此外,CSV文件往往包含“混合数据”-例如,您的文件包括文本标题以上的数字数据。但是,只要知道标题行数,就可以跳过相应的行数。
所以,如果你准备忍受“读取文件两次/直接读取到双精度”的解决方案,那么下面的内容可能会对你有所帮助。
请注意,有一些限定符。数值数据数组必须是矩形的,而不是不规则的。必须没有丢失的数据(如果有,字符串将是不可避免的)。必须有可识别的分隔符(见下文)。
正如其他人评论的那样,您的文件不是CSV文件(好吧,根据我的定义),因为数据由空格分隔,而不是逗号或类似的东西。
我通过添加逗号更改了数据文件。如果你想读取原始文件,那么你必须根据需要调整read语句和number of columns acquirer。
数据文件:

A,    CRIM,    ZN,  INDUS,  CHAS,    NOX,     RM,   AGE,     DIS,  RAD,    TAX
0, 0.00632,  18.0,   2.31,     0,  0.538,  6.575,  65.2,  4.0900,    1,  296.0
1, 0.02731,   0.0,   7.07,     0,  0.469,  6.421,  78.9,  4.9671,    2,  242.0
2, 0.02729,   0.0,   7.07,     0,  0.469,  7.185,  61.1,  4.9671,    2,  242.0
3, 0.03237,   0.0,   2.18,     0,  0.458,  6.998,  45.8,  6.0622,    3,  222.0
4, 0.06905,   0.0,   2.18,     0,  0.458,  7.147,  54.2,  6.0622,    3,  222.0

示例代码:

#include <iostream>                         
#include <fstream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;

vector< vector<double> > getdata( const string &filename, char separator = ',', int headerlines = 0 )
{

   // *** First pass: quick read to establish number of rows and columns
   ifstream in( filename );
   string line;
   getline( in, line );
   int nrows = 1;
   int ncols = count( line.begin(), line.end(), separator ) + 1;     // if separator present, count to get ncols
   while( getline( in, line ) ) nrows++;
   nrows -= headerlines;

   // *** Second pass: set up a float array and read directly into it (remember the headerlines)
   vector< vector<double> > result( nrows, vector<double>( ncols ) );
   in.close();
   in.open( filename );

   char dummy;
   for ( int i = 0; i < headerlines; i++ ) getline( in, line );      // skip header lines
   for ( int i = 0; i < nrows; i++ )                                 // ith row
   {
      in >> result[i][0];                                            // first item
      for ( int j = 1; j < ncols; j++ ) in >> dummy >> result[i][j]; // then comma and item pairs
   }

   return result;
}

//----------------------------------------------------------------------

int main()
{
   auto data = getdata( "housing.csv", ',', 1 );
   for ( auto &row : data )
   {
      for ( auto e : row ) cout << e << '\t';
      cout << '\n';
   }
}

由于你的数据实际上是int和double的混合(我认为),那么另一种方法是创建一个恰好包含这些字段的结构体,并重载<<运算符以读取包含该类型对象的行。

相关问题