在C中阅读CSV文件并处理数据中的逗号

0aydgbwb  于 2022-12-06  发布在  其他
关注(0)|答案(2)|浏览(232)

我写了一个代码来读取一个csv文件在c.该文件包含游戏的数据,我应该读取它,并根据得分排序,并打印前10名评级游戏.代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define tablesize 18626

typedef struct
{
    char title[200];
    char platform[20];
    char Score[20];
    char release_year[20];
} dict;

void printValues(dict *values)
{
    for (int i = 0; i < 100; i++)
    {
        printf("title->%s,platform->%s,Score->%s,release->%s\n", values[i].title, values[i].platform, values[i].Score, values[i].release_year);
    }
}

void sort(dict *values)
{
    for (int i = 0; i < tablesize; i++)
    {
        for (int j = i + 1; j < tablesize; j++)
        {
            int a = *values[i].Score - '0';
            int b = *values[j].Score - '0';
            // printf("%d %d\n",values[i].Score,values[j].Score);
            if (a < b)
            {
                dict temp = values[i];
                values[i] = values[j];
                values[j] = temp;
            }
        }
    }
}

int main()
{
    FILE *fp = fopen("t4_ign.csv", "r");
    if (!fp)
    {
        printf("Error");
        return 0;
    }
    char buff[1024];
    int row = 0, column = 0;
    int count = 0;
    dict *values = NULL;
    int i = 0;
    while (fgets(buff, 1024, fp))
    {
        column = 0;
        row++;
        count++;
        values = realloc(values, sizeof(dict) * count);
        if (NULL == values)
        {
            perror("realloc");
            break;
        }
        if (row == 1)
        {
            continue;
        }
        char *field = strtok(buff, ",");
        while (field)
        {
            if (column == 0)
            {
                strcpy(values[i].title, field);
            }
            if (column == 1)
            {
                strcpy(values[i].platform, field);
            }
            if (column == 2)
            {
                strcpy(values[i].Score, field);
            }
            if (column == 3)
            {
                strcpy(values[i].release_year, field);
            }
            field = strtok(NULL, ",");
            column++;
        }
        i++;
    }
    fclose(fp);
    printf("File loaded!\n", fp);
    sort(values);
    printValues(values);
    free(values);
    return 0;
}

我所面临的问题是CSV文件的标题字段中有逗号,因此它将逗号分隔的数据区分为不同的列,这在加载结构中的数据时会产生错误。
下面是输入文件的两个示例行。当标题包含逗号时使用引号。

"The Chronicles of Narnia: The Lion, The Witch and The Wardrobe",PlayStation 2,8,2005  
The Chronicles of Narnia: Prince Caspian,Wireless,5,2008

有什么建议吗?先谢谢了。

j2cgzkjk

j2cgzkjk1#

由于title字段包含逗号时要使用引号,我建议您检查一下是否使用了",如果是,请在第一项中使用该分隔符。

char *field;
if(buff[0] == '"') {
    field = strtok(buff, "\"");
}
else {
    field = strtok(buff, ",");
}

第一个命令将逗号作为下一个字段的第一个字符,但下一个strtok将过滤掉它,因为它不允许“空”字段。

o8x7eapl

o8x7eapl2#

函数strtok不适合您的需要,因为它将引号视为与其他字符一样的字符。因此,当strtok看到逗号时,它不会关心逗号是否在引号内。
此外,正如其他人在评论部分指出的,strtok的另一个问题是它跳过空字段。
因此,我不建议使用strtok来完成您想要的任务。
为了解决你的问题,我建议你自己写一个函数,它的功能和strtokstrsep非常相似,但是如果第一个非whitespace字符是引号,它会把下一个引号当作分隔符,而不是下一个逗号。在下面的代码中,我把这个函数命名为my_strsep
以下是一个示例:

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#define NUM_LINES 2

//this function is equivalent to the POSIX function "strsep", except
//that it always uses "," as a delimiter, unless the first
//non-whitespace character is a quotation mark, in which case it //skips the quotation mark and uses the next quotation mark as a
//delimiter, also consuming the next comma
char *my_strsep( char **restrict stringp )
{
    char *p = *stringp;
    char *start;
    char delimiter = ',';

    //do nothing if *stringp is 
    if ( *stringp == NULL )
        return NULL;

    //skip all whitespace characters
    while ( isspace( (unsigned char)*p ) )
        p++;

    //remember start of field
    start = p;

    //determine whether this field uses quotation marks
    if ( *p == '"' )
    {
        //set delimiter to quotation mark instead of comma
        delimiter = '\"';

        //skip the first quotation mark
        p++;
    }

    //remember the start of the string
    start = p;

    while ( *p != delimiter )
    {
        if ( *p == '\0' )
        {
            if ( delimiter == '\"' )
            {
                fprintf( stderr,
                    "Warning: Encountered end of string before the "
                    "second quotation mark!\n"
                );
            }

            //pass information back to calling function
            *stringp = NULL;
            return start;
        }

        p++;
    }

    //overwrite the delimiter with a null character
    *p = '\0';

    //go past the delimiter
    p++;

    //skip the comma too, if quotation marks are being used
    if ( delimiter == '\"' )
    {
        //skip all whitespace characters
        while ( isspace( (unsigned char)*p ) )
            p++;

        //skip the comma
        if ( *p == ',' )
            p++;
    }

    //pass information back to calling function
    *stringp = p;
    return start;
}

int main( void )
{
    char lines[NUM_LINES][200] = {
        "\"The Chronicles of Narnia: The Lion, The Witch and The Wardrobe\",PlayStation 2,8,2005",
        "The Chronicles of Narnia: Prince Caspian,Wireless,5,2008"
    };

    for ( int i = 0; i < NUM_LINES; i++ )
    {
        char *p, *q;

        printf( "Processing line #%d:\n", i + 1 );

        p = lines[i];

        while ( ( q = my_strsep( &p ) ) != NULL )
        {
            printf( "Found field: %s\n", q );
        }

        printf( "\n" );
    }
}

此程序具有以下输出:

Processing line #1:
Found field: The Chronicles of Narnia: The Lion, The Witch and The Wardrobe
Found field: PlayStation 2
Found field: 8
Found field: 2005

Processing line #2:
Found field: The Chronicles of Narnia: Prince Caspian
Found field: Wireless
Found field: 5
Found field: 2008

如您所见,函数my_strsep可以处理带引号和不带引号的字段。

相关问题