目前正在使用Flex扫描定义C子集的语法。
/*definitions*/
%{
#include <stdio.h>
int yylex(void);
%}
/*rules*/
%%
"int" {printf("TYPE_INT %s\n",yytext);}
"float" {printf("TYPE_FLOAT %s\n",yytext);}
"char" {printf("TYPE_CHAR %s\n",yytext);}
"bool" {printf("TYPE_BOOL %s\n",yytext);}
"while" {printf("WHILE %s\n",yytext);}
"do" {printf("DO %s\n",yytext);}
"for" {printf("FOR %s\n",yytext);}
"printf" {printf("PRINTF %s\n",yytext);}
"scanf" {printf("SCANF %s\n",yytext);}
"if" {printf("IF %s\n",yytext);}
"elif" {printf("ELIF %s\n",yytext);}
"else if" {printf("ELSE_IF %s\n",yytext);}
"else" {printf("ELSE %s\n",yytext);}
"true" {printf("TRUE %s\n",yytext);}
"false" {printf("FALSE %s\n",yytext);}
"void" {printf("VOID %s\n",yytext);}
"main" {printf("MAIN %s\n",yytext);}
"return" {printf("RETURN %s\n",yytext);}
[a-zA-Z_][a-zA-Z0-9]* {printf("IDENTIFIER %s\n",yytext);}
[-+]?(([1-9][0-9]*)|0) {printf("INTEGER %s\n",yytext);}
[-+]?[0-9]+\.[0-9]+ {printf("FLOAT %s\n",yytext);}
\"[^\"\n]*\" {printf("STRING %s\n",yytext);}
"," {printf("COMMA %s\n",yytext);}
";" {printf("SEMICOLON %s\n",yytext);}
"{" {printf("LEFT_BRACE %s\n",yytext);}
"}" {printf("RIGHT_BRACE %s\n",yytext);}
"(" {printf("LEFT_PAREN %s\n",yytext);}
")" {printf("RIGHT_PAREN %s\n",yytext);}
"[" {printf("LEFT_BRACKET %s\n",yytext);}
"]" {printf("RIGHT_BRACKET %s\n",yytext);}
"-" {printf("MINUS %s\n",yytext);}
"+" {printf("PLUS %s\n",yytext);}
"*" {printf("MULTIPLY %s\n",yytext);}
"/" {printf("DIVIDE %s\n",yytext);}
"\\" {printf("BACKSLASH %s\n",yytext);}
"%" {printf("MODULUS %s\n",yytext);}
"==" {printf("EQUALS %s\n",yytext);}
"!=" {printf("NOT_EQUALS %s\n",yytext);}
"<" {printf("LESS_THAN %s\n",yytext);}
">" {printf("GREATER_THAN %s\n",yytext);}
"<=" {printf("LESS_THAN_OR_EQUAL %s\n",yytext);}
">=" {printf("GREATER_THAN_OR_EQUAL %s\n",yytext);}
"=" {printf("ASSIGN %s\n",yytext);}
"&&" {printf("LOGICAL_AND %s\n",yytext);}
"||" {printf("LOGICAL_OR %s\n",yytext);}
"!" {printf("LOGICAL_NOT %s\n",yytext);}
[" "|\t|\n|\f|\v] {printf("WHITESPACE\n");}
. {printf("UNRECOGNIZED_CHARACTER %s\n",yytext);}
%%
/*for when we use multipe input files*/
int yywrap(void){
return 1;
}
/*main driver function that takes */
int main(int argc, char *argv[]){
if(argc<2){
printf("Usage: %s <input_file_name>\n",argv[0]);
return 1;
}
FILE *fp = fopen(argv[1], "r");
if(fp == NULL){
printf("Error opening input file.\n");
return 1;
}
yyin = fp;
yylex();
fclose(fp);
return 0;
}
我遇到的问题是某些输入,如90.s3
和232a3
。而不是像上一个语法规则中那样获得'UNRECognizedCHARACTER',flex将它们分开。232a3
变为
INTEGER 232
IDENTIFIER a3
而90.s3
变成
INTEGER 90
UNRECOGNIZED_CHARACTER .
IDENTIFIER s3
我该如何阻止这种情况的发生?
1条答案
按热度按时间hm2xizp91#
我该如何阻止这种情况的发生?
有几种选择。其中,
INTEGER
模式仅识别紧跟文件结尾的整数标记,或者既不是小数点也不是大写字母或小写字母的字符:请注意,这个例子省略了数字模式中的前导
+
和-
。它们将由您现有的+
和-
运算符模式匹配,这正是它们在C中的情况。还要注意,如果像在C中那样支持
.
作为成员选择操作符,则需要做更多的工作--可能不止一个错误模式。