C语言 如何使用链表查找给定文本中每个单词的示例

0md85ypi  于 2023-04-19  发布在  其他
关注(0)|答案(2)|浏览(98)

因此,我们有一个写有单词的文本文件。我们必须打印出每个单词的频率。比较是字符不敏感的,这意味着:tallTALLtAlL被认为是同一个字。
我已经使用基本的for循环和结构完成了这个练习,但现在我必须使用链表实现它。
我关于如何做到这一点的基本想法基本上与我使用for循环所做的相同:使用一个char buffer[N],预先定义的N为100,我们一个字一个字地扫描文件,然后我们检查这个词是否已经存在于链表中,如果是这样,你就中断循环,不添加下一个节点,你只增加频率,否则如果没有找到,你就添加频率等于1的新节点。
至少这是我试图实现的。
这是我到目前为止所做的

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 100
typedef struct node
{
    char *str;
    int instance;
    struct node* next;

} node;
node *createlist(FILE *file)
{

    node *tmp=NULL;
    node *head=NULL;
    node* p=NULL;
    char buffer[N+3];
    int found=0;

    file=fopen("file","r");
    if(file==NULL)
    {
        printf("Error opening file:");
        exit(1);
    }
    while(fscanf(file,"%s",buffer)!=EOF)
    {
        tmp=malloc(sizeof(node));
        tmp->str=malloc((strlen(buffer)+1)*sizeof(char));
        tmp->next=NULL;
        strcpy(tmp->str,buffer);
        if(tmp==NULL)
        {
            printf("Allocation error");
            exit(1);
        }
        if(tmp->str == NULL)
        {
            printf("Allocation error");
            exit(2);
        }
        if(head == NULL)
        {
            head=tmp;
        }
        else
        {
            p=head;
            while(p->next != NULL)
            {
                if(strcasecmp(p->str,buffer)==0)
                {
                    found=1;
                    break;
                }
                else
                {
                    found=0;
                    p=p->next;

                }
            }
            if(found==1)
            {
                p->instance++;
            }
            else if(found==0)
            {
                p->instance=1;
                p->next=tmp;  // here if found a small error i had 
                              //   made so i moved p->next=tmp  inside 
                              // the else if function 

            }

 

        }
    }

    return head;
}

void printfile(node *head)
{
    node* tmp=head;
    while(tmp != NULL)
    {
        printf("%s->%d\n",tmp->str,tmp->instance);
        tmp=tmp->next;
    }
}

int main()
{
    FILE *file;
    node *head=NULL;

    head=createlist(file);
    printfile(head);

    return 0;
}

问题是它仍然存储一些已经使用过的单词。
例如,文本文件为

Mi illumino di immenso
Illumino di immenso
Di immenso
IMMENSO
My output is
Mi->1
illumino->2
di->3
immenso->3
immenso->13435072

当它应该是

immenso 4
di 3
illumino 2
Mi 1

现在我想打印它们。这一个要求它们按降序打印,但现在我只想打印它们
你知道我能做些什么来解决这个问题吗?

pgky5nke

pgky5nke1#

有几个问题...
1.在main中,你从来没有将file设置为任何值,所以我们有UB(未定义行为)。你可能想从argv[1]设置它。

  1. createlist接受一个FILE *参数,但随后您执行fopen("file","r"),因此您将打开一个固定名称的文件。
    1.您可能希望将createlist的参数更改为const char *file
    1.您的if (tmp == NULL)if (tmp->str == NULL)出现得太晚。
    1.因为你做了while (p->next != NULL),所以你没有 * 比较/检查列表的 last 元素。
    1.在 * 决定是否有重复之前,您分配[并填充] tmp *,因此 * 如果 * 是重复,则会泄漏内存。
    1.最好只分配tmp,如果它不是一个dup。
    1.在底部,您需要执行fclose(file);
    下面是您的原始代码,其中注解了一些bug:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define N 100
typedef struct node {
    char *str;
    int instance;
    struct node *next;
} node;

node *
createlist(FILE *file)
{
    node *tmp = NULL;
    node *head = NULL;
    node *p = NULL;
    char buffer[N + 3];
    int found = 0;

    file = fopen("file", "r");
    if (file == NULL) {
        printf("Error opening file:");
        exit(1);
    }
    while (fscanf(file, "%s", buffer) != EOF) {
        tmp = malloc(sizeof(node));
        tmp->str = malloc((strlen(buffer) + 1) * sizeof(char));
        tmp->next = NULL;
        strcpy(tmp->str, buffer);

// NOTE/BUG: this check occurs too late
        if (tmp == NULL) {
            printf("Allocation error");
            exit(1);
        }

// NOTE/BUG: this check occurs too late
        if (tmp->str == NULL) {
            printf("Allocation error");
            exit(2);
        }

        if (head == NULL) {
            head = tmp;
        }
        else {
            p = head;
// NOTE/BUG: this does _not_ compare the _last_ element in the list
            while (p->next != NULL) {
                if (strcasecmp(p->str, buffer) == 0) {
                    found = 1;
                    break;
                }
                else {
                    found = 0;
                    p = p->next;

                }
            }

// NOTE/BUG: since we've already allocated tmp, this _leaks_ that memory
            if (found == 1) {
                p->instance++;
            }
            else if (found == 0) {
                p->instance = 1;
                p->next = tmp;          // here if found a small error i had
                // made so i moved p->next=tmp inside
                // the else if function

            }

        }
    }

    return head;
}

void
printfile(node *head)
{
    node *tmp = head;

    while (tmp != NULL) {
        printf("%s->%d\n", tmp->str, tmp->instance);
        tmp = tmp->next;
    }
}

int
main(void)
{
    FILE *file;
    node *head = NULL;

// NOTE/BUG: file is _never_ defined
    head = createlist(file);
    printfile(head);

    return 0;
}

下面是正确的代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define N 100
typedef struct node {
    char *str;
    int instance;
    struct node *next;
} node;

node *
createlist(const char *file)
{
    node *head = NULL;
    char buffer[N + 3];

    FILE *fin = fopen(file, "r");
    if (fin == NULL) {
        printf("Error opening file:");
        exit(1);
    }

    while (fscanf(fin, "%s", buffer) != EOF) {
        int found = 0;

        node *cur;
        node *prev = NULL;

        for (cur = head;  cur != NULL;  prev = cur, cur = cur->next) {
            found = (strcasecmp(cur->str, buffer) == 0);
            if (found)
                break;
        }

        if (found) {
            cur->instance += 1;
            continue;
        }

        node *tmp = malloc(sizeof(*tmp));
        if (tmp == NULL) {
            printf("Allocation error");
            exit(1);
        }

        tmp->str = malloc(strlen(buffer) + 1);
        if (tmp->str == NULL) {
            printf("Allocation error");
            exit(2);
        }

        strcpy(tmp->str,buffer);
        tmp->instance = 1;
        tmp->next = NULL;

        // add to tail of list
        if (prev != NULL)
            prev->next = tmp;

        // start new list
        else
            head = tmp;
    }

    fclose(fin);

    return head;
}

void
printfile(node *head)
{
    node *tmp = head;

    while (tmp != NULL) {
        printf("%s->%d\n", tmp->str, tmp->instance);
        tmp = tmp->next;
    }
}

int
main(int argc,char **argv)
{
    node *head = NULL;

    if (argc != 2) {
        printf("No filename specified\n");
        exit(3);
    }

    head = createlist(argv[1]);
    printfile(head);

    return 0;
}

下面是程序输出:

Mi->1
illumino->2
di->3
immenso->4
pn9klfpd

pn9klfpd2#

我已经简化了代码,并添加了错误检查。我还按照需要的相同方向构建了链表,这使得工作更简单。在任何时候都不需要创建head的特殊情况。请注意,只有当它不存在时才创建新节点。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
    
#define N 100
typedef struct node
{
    char *str;
    int instance;
    struct node *next;
} node;
    
node *createlist(char *filename)                // pass file name instead
{
    node *head = NULL;
    char buffer[N+3];
    
    FILE *file = fopen(filename, "rt");         // no need to have as argument
    if(file == NULL)
    {
        printf("Error opening file:\n");
        exit(1);
    }
    while(fscanf(file, "%99s", buffer) == 1)    // but better not to hard code length
    {
        int found = 0;                          // define variables where needed
        node *p = head;
        while(p)                                // not while(p->next)
        {
            //if(strcasecmp(p->str, buffer) == 0) // gcc
            if(stricmp(p->str, buffer) == 0)    // windows                {
                found = 1;
                break;
            }
            p = p->next;
        }
        
        if(found) {
            p->instance++;
        }
        else {
            node *tmp = malloc(sizeof(node));
            if(tmp == NULL) {
                printf("Node malloc error:\n");
                exit(1);
            }
            tmp->str = malloc(strlen(buffer) + 1);
            if(tmp->str == NULL) {
                printf("String malloc error:\n");
                exit(1);
            }
            strcpy(tmp->str, buffer);
            tmp->instance = 1;
            
            tmp->next = head;                   // add to head of list
            head = tmp;
        }
    }
    fclose(file);
    return head;
}
    
void printfile(node *head)
{
    node* tmp = head;
    while(tmp != NULL)
    {
        printf("%s->%d\n", tmp->str, tmp->instance);
        tmp=tmp->next;
    }
}
    
int main(void)
{
    node *head = createlist("test.txt");
    printfile(head);
    return 0;
}

程序输出:

immenso->4
di->3
illumino->2
Mi->1

相关问题