所以我最近开始学习C语言，它开始变得比我想象的要复杂得多。我在解析字符串的函数上遇到了问题。我正在尝试做的是解析输入，例如：50 + 50或50+50，这样它就成为一个数组，其中50、+、50作为数组的不同元素。下面是parseString函数及其辅助函数：

int testnumber = 0;

void test() {
    testnumber++;
    printf("Test %d", testnumber);
    printf("\n");
}

bool isNumber(char *token) {
    int i;
    for (i = 0; token[i] != '\0'; i++) {
        if (!isdigit(token[i]))
            return false;
    }
    return true;
}

bool isOperator(char *token) {
    if (strlen(token) > 1)
        return false;
    switch (token[0]) {
      case '+':
      case '-':
      case '*':
      case '/':
      case '%':
        return true;
      default:
        return false;
    }
}

char **parseString(char *input) {
    test();
    int n_spaces = 0, i, start = 0, end;
    for (i = 0; i < strlen(input); i++) {
        if (input[i] == ' ') {
            n_spaces++;
        }
    }
    char **tokens = malloc(sizeof(char *) * (n_spaces + 1));
    for (i = 0; i < strlen(input); i++) {
        if (input[i] == ' ') {
            end = i;
            tokens = realloc(tokens, sizeof(char*) * (n_spaces + 1));
            tokens[n_spaces] = malloc(end - start + 1);
            strncpy(tokens[n_spaces], input + start, end - start);
            tokens[n_spaces][end - start] = '\0';
            start = i + 1;
            n_spaces++;
        }
    }
    end = strlen(input);
    tokens[n_spaces] = malloc(end - start + 1);
    strncpy(tokens[n_spaces], input + start, end - start);
    tokens[n_spaces][end - start] = '\0';
    n_spaces++;
    int j = 0;
    for (i = 0; i < n_spaces; i++) {
        char *temp = tokens[i];
        if (isNumber(temp)) {
            tokens[j++] = temp;
        } else
        if (isOperator(temp)) {
            tokens[j++] = temp;
        }
    }
    
    test();
    tokens[j] = NULL;
    return tokens;
}

下面也是我用来测试输出的主要函数：

int main() {
    // Declare variable 'size' and initialize it with 100
    size_t size = 100;

    // Allocate memory for a character array and assign it to pointer 'input'
    char *input = malloc(size);

    // Read input from standard input and store it in the character array 'input'
    fgets(input, (int) size, stdin);

    char **tokens = parseString(input);

    test();
    int i;
    for (i = 0; *(tokens + i); i++) {
        printf("%s\n", *(tokens + i));
    }

    test();
    // Free up memory.
    free(input);
    free(tokens);
    return 0;
}

我试着在main函数中添加一些printf，看看为什么它没有给予我期望的输出：

int main() {
    // Declare variable 'size' and initialize it with 100
    size_t size = 100;

    // Allocate memory for a character array and assign it to pointer 'input'
    char *input = malloc(size);

    // Read input from standard input and store it in the character array 'input'
    fgets(input, (int) size, stdin);

    char **tokens = parseString(input);

    printf("%s\n", tokens[0]);
    printf("%s\n", tokens[1]);
    printf("%s\n", tokens[2]);
    test();
    int i;
    /*for (i = 0; *(tokens + i); i++) {
        printf("%s\n", *(tokens + i));
    }*/

    test();
    // Free up memory.
    free(input);
    free(tokens);
    return 0;
}

但它只给出了输出：

Test 1
Test 2
50
+
(null)
Test 3
Test 4
Process finished with exit code 0

当我给予它输入50 + 50时，但当我给它输入50+50时，它给我输出：

Test 1
Test 2
(null)

Process finished with exit code -1073741819 (0xC0000005)

if (isdigit(input[current_index])) { char number_buffer[11] = { 0 }; // Enough for a 32-bit integer // Initialized to zero so there's a null-terminator at the end size_t number_position = 0; // Position in the number-buffer // While there's a digit in the input, and we don't overflow the // number string buffer... while (isdigit(input[current_index]) && number_position < 10) { // Copy the character number_buffer[number_position++] = input[current_index++]; } // TODO: Check for and handle if the number is too long // Now we can convert the number string into an actual value int value = strtol(number_buffer, NULL, 10); // TODO: Store the number somehow // Return to the caller, saying we found a number return NUMBER; }

1条答案

按热度按时间

zed5wv101#

这是关于我通常如何处理标记或词位解析的描述。
我通常做的是在循环中一次查看一两个字符，通常使用位置变量（如数组索引）。
初始化当前位置为零，使输入字符串的第一个字符为“当前字符”，然后进入一个无限循环，在循环中检查它是否是输入的最后一个字符，如果是，则返回一个文件结束或输入结束值。
然后，如果当前字符是空格（使用isspace检查），则转到输入中的下一个字符（将“当前字符”索引增加一），continue执行无限循环。
在此之后，我使用一系列if语句来识别不同的标记/词位。

if (isdigit(input[current_position]))
{
    // TODO: Handle numbers
}

类似地，字符串和标识符等其他常见标记也是如此。
每个这样的检查return与令牌的正确值。
最后，对于“operator”标记，我使用switch语句返回它们各自的值：

switch (input[current_index])
{
case '+':
    return PLUS;
case '-':
    return MINUS;
// Etc...

default:
    // TODO: Error
}

如果此时我们还没有返回一个可识别的标记，那就是一个错误，我将返回一个错误代码或类似的代码。
无限循环并不是必须的，但是我认为它可以使空格的处理变得更容易，而且我认为它可以更容易地处理需要多次读取或获取的缓冲输入（如果我们到达当前输入的末尾，读取新的输入，重置位置/字符索引，然后continue循环）。
现在我们来看看整数等令牌的细节，当当前字符为数字时，我们会一个字符接一个字符地读取，并放入一个新的临时缓冲区，只要它是特定令牌的有效字符。当我们有一个不同的字符时，只需停止读取，将临时缓冲区转换为合适的类型，并返回。
数字的例子可以扩展为如下形式：

上面概述的方法相当通用，但是返回结果更适合解析器，而不是将标记拆分为数组。
但是你仍然可以把它作为一个基础，或者把它作为把输入分解成独立的标记的一部分，而不是像我上面的例子那样返回标记类型，你可以把字符存储到一个缓冲区中，终止它，然后返回它（实际上把缓冲区作为一个参数传递给函数，然后填充它）。
为适应您的用例而进行的修改留作练习。：）

赞(0）回复(0）举报 2023-01-20

在C中解析字符串以拆分整数和运算符

1条答案

相关问题

热门标签

最新问答