天天看點

編譯原理上機實戰---詞法分析

詞法分析

前一段時間一直在忙上機的事情,連續2周上機四次,現在才有時間整理

廢話不多說,上思路:

首先我是以這個 類别和種别碼 作為标準進行分類。

/*
單詞符号	種别碼			單詞符号	種别碼			單詞符号	種别碼
bengin		1				if			2				then		3
while		4				do			5				end			6
main		7				int			8				float		9
for			10				else		11				double		12
char		13				break		14				continue	15
辨別符		16				整數			17				+			18
-			19				*			20				#			21
%			22				!=			23				<			24
<>			25				<=			26				>			27
>=			28				=			29				==			30
;			31				(			32				)			33
!			34				/			35				"			36
*/
           

當然也可以進行添加 ,隻是需要在switch-case裡面添加對應的case而已;

寫代碼的時候需要注意的是 ,分析每一個類别是否要進入main裡面的分析循環

代碼如下:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define _KEY_WORD_END "waiting for your expanding" /*定義結束關鍵字*/ 
#define MAX_SIZE 225
typedef struct {
	int typenum;
	char *word;
}WORD; 

WORD *scanner();			//詞法掃描函數,獲得一個單詞
char m_getch();
void  getbc();
void concat();
int letter();
int digit();
int reserve();
void retract();
char *dtb();
 

char input[MAX_SIZE];		//輸入緩沖區
char token[MAX_SIZE]="";		//單詞緩沖區
int p_input;				//輸入緩沖區指針
int p_token;				//單詞緩沖區指針

/*
單詞符号	種别碼			單詞符号	種别碼			單詞符号	種别碼
bengin		1				if			2				then		3
while		4				do			5				end			6
main		7				int			8				float		9
for			10				else		11				double		12
char		13				break		14				continue	15
辨別符		16				整數		17				+			18
-			19				*			20				#			21
%			22				!=			23				<			24
<>			25				<=			26				>			27
>=			28				=			29				==			30
;			31				(			32				)			33
!			34				/			35				"			36

程式能識别注釋,且将其過濾掉 
*/
char ch;					//目前讀入字元
char * rwtab[] = {"begin","if","then","while","do","end","main","int","float","for","else","double","char","break","continue","printf"} ;
 
int main(){
	
	int over = 1;
	WORD * oneword = new WORD;//已經測試可以使用 new
	printf("Enter your words(end with $):\n");
	scanf("%[^$]s",input); 
	p_input = 0;
	printf("\nYour words : \n%s\n",input);
	while(over < 1000 && over != -1){		
		oneword = scanner();	//獲得一個單詞 		
		if(oneword->typenum < 1000)
			printf("(% d, % s)\n",oneword->typenum,oneword->word);//列印種别碼和自生的值
		over = oneword->typenum;			
	}
	printf("\n press $ to exit:");
	scanf("%[^$]s",input);

	return 0;
} 

char m_getch(){					//從輸入緩存區讀取一個字元到ch中 
	ch  = input[p_input];
	p_input = p_input + 1;
//	printf("-------%s\n",input);
	return (ch);
} 

void getbc(){					//去掉空白部分 
	while(ch==' '||ch == 10){
		ch = input[p_input];
		p_input += 1;
	}
} 

void concat(){					//拼接單詞 
	token[p_token] = ch;	
	p_token += 1;
	token[p_token] = '\0';
} 

int letter(){					//判斷是字母 
	if(ch >= 'a'&&ch <= 'z'||ch >= 'A'&&ch <= 'Z') return 1;
	return 0; 
} 

int digit(){					//判斷是數字 
	if(ch >= '0'&&ch <= '9')return 1;
	else return 0;
}

int reserve(){					//檢索關鍵字表格 
	int i = 0;
	while(strcmp(rwtab[i],_KEY_WORD_END)){		
		if(!strcmp(rwtab[i],token)){
			return (i + 1);
		}
		if(i>strlen(*rwtab)) break;
		i = i + 1;
	}
	return 16;
}

void retract(){					//回退一個字元
	p_input = p_input - 1; 
} 

char *dbt(){					//數字轉換成二進制
	return NULL;
} 

WORD *scanner(){
	WORD *myword = new WORD;
	myword->typenum = 10;
	myword->word = "";
	p_token = 0;
	m_getch();
	getbc();
		
	if(letter()){
		while(letter()||digit()){
			concat();		//拼接單詞 
			m_getch();		//從輸入緩存區讀取一個字元到ch中 
		}
		retract();			//回退一個字元
		myword->typenum = reserve();
		myword->word = token;
		return (myword);		
	}
	else if(digit()){
		while(digit()){
			concat();
			m_getch();
		}
		retract();
		myword->typenum = 17;
		myword->word = token;
		return (myword);
	}
	else switch(ch){
		case '+':	myword->typenum = 18;
					myword->word = "+";
					return (myword);
					break;
		case '-':	myword->typenum = 19;
					myword->word = "-";
					return (myword);
					break;
		case '*':	myword->typenum = 20;
					myword->word = "*";
					return (myword);
					break;
		case '#':	myword->typenum = 21;
					myword->word = "#";
					return (myword);
					break;
		case '%':	myword->typenum = 22;
					myword->word = "%";
					return (myword);
					break;
		case ';':	myword->typenum = 31;
					myword->word = ";";
					return (myword);
					break;
		case '(':	myword->typenum = 32;
					myword->word = "(";
					return (myword);
					break;
		case ')':	myword->typenum = 33;
					myword->word = ")";
					return (myword);
					break;
		case '!':	m_getch();
					if(ch == '='){
						myword->typenum = 23;
						myword->word = "!=";
						return (myword);
					}
					retract();
					myword->typenum = 34;
					myword->word = "!";
					return (myword);
					break;
		case '<':	m_getch();
					if(ch == '='){
						myword->typenum = 26;
						myword->word = "<=";
						return (myword);
					}
					if(ch == '>'){
						myword->typenum = 25;
						myword->word = "<>";
						return (myword);
					}
					retract();
					myword->typenum = 24;
					myword->word = "<";
					return (myword);
					break;
		case '>':	m_getch();
					if(ch == '='){
						myword->typenum = 28;
						myword->word = ">=";
						return (myword);
					}
					retract();
					myword->typenum = 27;
					myword->word = ">";
					return (myword);
					break;
		case '=':	m_getch();
					if(ch == '='){
						myword->typenum = 30;
						myword->word = "==";
						return (myword);
					}
					retract();
					myword->typenum = 29;
					myword->word = "=";
					return (myword);
					break;
		case'\0': myword->typenum=1000;			//當讀取到最後一個字元串的時候 ,為 \0 ,程式會當成無法辨識的程式,故而需要一個case來處理 
                  myword->word="OVER";
                  return(myword);
                  break;
        case '/': m_getch();
    			  if(ch == '*'){
    			  	while(1){
    			  		m_getch();
    			  		if(ch == '*'){
    			  			m_getch();
    			  			if(ch == '/') 
								break;
						}
					  }
					myword->typenum = 1000;
					myword->word = NULL;
					return (myword);
				  }
				  else if(ch == '/'){
				  	while(1){
				  		m_getch();
						if(ch == '\n')
							break; 
					}
					myword->typenum = 1000;
					myword->word = NULL;
					return (myword);
				  }
				  retract();
				  myword->typenum = 35;
				  myword->word = "/";
				  return (myword);
				break;
		case '"': myword->typenum = 36;
		
				  myword->word = " “";
				  return (myword);
				  break;
		default: myword->typenum = -1;
				 myword->word = "ERROR";
				 return (myword);
	}
}