詞法分析
前一段時間一直在忙上機的事情,連續2周上機四次,現在才有時間整理
廢話不多說,上思路:
首先我是以這個 類别和種别碼 作為标準進行分類。
/*
單詞符号 種别碼 單詞符号 種别碼 單詞符号 種别碼
bengin 1 if 2 then 3
while 4 do 5 end 6
main 7 int 8 float 9
for 10 else 11 double 12
char 13 break 14 continue 15
辨別符 16 整數 17 + 18
- 19 * 20 # 21
% 22 != 23 < 24
<> 25 <= 26 > 27
>= 28 = 29 == 30
; 31 ( 32 ) 33
! 34 / 35 " 36
*/
當然也可以進行添加 ,隻是需要在switch-case裡面添加對應的case而已;
寫代碼的時候需要注意的是 ,分析每一個類别是否要進入main裡面的分析循環
代碼如下:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define _KEY_WORD_END "waiting for your expanding" /*定義結束關鍵字*/
#define MAX_SIZE 225
typedef struct {
int typenum;
char *word;
}WORD;
WORD *scanner(); //詞法掃描函數,獲得一個單詞
char m_getch();
void getbc();
void concat();
int letter();
int digit();
int reserve();
void retract();
char *dtb();
char input[MAX_SIZE]; //輸入緩沖區
char token[MAX_SIZE]=""; //單詞緩沖區
int p_input; //輸入緩沖區指針
int p_token; //單詞緩沖區指針
/*
單詞符号 種别碼 單詞符号 種别碼 單詞符号 種别碼
bengin 1 if 2 then 3
while 4 do 5 end 6
main 7 int 8 float 9
for 10 else 11 double 12
char 13 break 14 continue 15
辨別符 16 整數 17 + 18
- 19 * 20 # 21
% 22 != 23 < 24
<> 25 <= 26 > 27
>= 28 = 29 == 30
; 31 ( 32 ) 33
! 34 / 35 " 36
程式能識别注釋,且将其過濾掉
*/
char ch; //目前讀入字元
char * rwtab[] = {"begin","if","then","while","do","end","main","int","float","for","else","double","char","break","continue","printf"} ;
int main(){
int over = 1;
WORD * oneword = new WORD;//已經測試可以使用 new
printf("Enter your words(end with $):\n");
scanf("%[^$]s",input);
p_input = 0;
printf("\nYour words : \n%s\n",input);
while(over < 1000 && over != -1){
oneword = scanner(); //獲得一個單詞
if(oneword->typenum < 1000)
printf("(% d, % s)\n",oneword->typenum,oneword->word);//列印種别碼和自生的值
over = oneword->typenum;
}
printf("\n press $ to exit:");
scanf("%[^$]s",input);
return 0;
}
char m_getch(){ //從輸入緩存區讀取一個字元到ch中
ch = input[p_input];
p_input = p_input + 1;
// printf("-------%s\n",input);
return (ch);
}
void getbc(){ //去掉空白部分
while(ch==' '||ch == 10){
ch = input[p_input];
p_input += 1;
}
}
void concat(){ //拼接單詞
token[p_token] = ch;
p_token += 1;
token[p_token] = '\0';
}
int letter(){ //判斷是字母
if(ch >= 'a'&&ch <= 'z'||ch >= 'A'&&ch <= 'Z') return 1;
return 0;
}
int digit(){ //判斷是數字
if(ch >= '0'&&ch <= '9')return 1;
else return 0;
}
int reserve(){ //檢索關鍵字表格
int i = 0;
while(strcmp(rwtab[i],_KEY_WORD_END)){
if(!strcmp(rwtab[i],token)){
return (i + 1);
}
if(i>strlen(*rwtab)) break;
i = i + 1;
}
return 16;
}
void retract(){ //回退一個字元
p_input = p_input - 1;
}
char *dbt(){ //數字轉換成二進制
return NULL;
}
WORD *scanner(){
WORD *myword = new WORD;
myword->typenum = 10;
myword->word = "";
p_token = 0;
m_getch();
getbc();
if(letter()){
while(letter()||digit()){
concat(); //拼接單詞
m_getch(); //從輸入緩存區讀取一個字元到ch中
}
retract(); //回退一個字元
myword->typenum = reserve();
myword->word = token;
return (myword);
}
else if(digit()){
while(digit()){
concat();
m_getch();
}
retract();
myword->typenum = 17;
myword->word = token;
return (myword);
}
else switch(ch){
case '+': myword->typenum = 18;
myword->word = "+";
return (myword);
break;
case '-': myword->typenum = 19;
myword->word = "-";
return (myword);
break;
case '*': myword->typenum = 20;
myword->word = "*";
return (myword);
break;
case '#': myword->typenum = 21;
myword->word = "#";
return (myword);
break;
case '%': myword->typenum = 22;
myword->word = "%";
return (myword);
break;
case ';': myword->typenum = 31;
myword->word = ";";
return (myword);
break;
case '(': myword->typenum = 32;
myword->word = "(";
return (myword);
break;
case ')': myword->typenum = 33;
myword->word = ")";
return (myword);
break;
case '!': m_getch();
if(ch == '='){
myword->typenum = 23;
myword->word = "!=";
return (myword);
}
retract();
myword->typenum = 34;
myword->word = "!";
return (myword);
break;
case '<': m_getch();
if(ch == '='){
myword->typenum = 26;
myword->word = "<=";
return (myword);
}
if(ch == '>'){
myword->typenum = 25;
myword->word = "<>";
return (myword);
}
retract();
myword->typenum = 24;
myword->word = "<";
return (myword);
break;
case '>': m_getch();
if(ch == '='){
myword->typenum = 28;
myword->word = ">=";
return (myword);
}
retract();
myword->typenum = 27;
myword->word = ">";
return (myword);
break;
case '=': m_getch();
if(ch == '='){
myword->typenum = 30;
myword->word = "==";
return (myword);
}
retract();
myword->typenum = 29;
myword->word = "=";
return (myword);
break;
case'\0': myword->typenum=1000; //當讀取到最後一個字元串的時候 ,為 \0 ,程式會當成無法辨識的程式,故而需要一個case來處理
myword->word="OVER";
return(myword);
break;
case '/': m_getch();
if(ch == '*'){
while(1){
m_getch();
if(ch == '*'){
m_getch();
if(ch == '/')
break;
}
}
myword->typenum = 1000;
myword->word = NULL;
return (myword);
}
else if(ch == '/'){
while(1){
m_getch();
if(ch == '\n')
break;
}
myword->typenum = 1000;
myword->word = NULL;
return (myword);
}
retract();
myword->typenum = 35;
myword->word = "/";
return (myword);
break;
case '"': myword->typenum = 36;
myword->word = " “";
return (myword);
break;
default: myword->typenum = -1;
myword->word = "ERROR";
return (myword);
}
}