基于PLY的解釋器——實作了常見的文法

本篇部落格是接着上兩篇部落格講解的，

https://blog.csdn.net/shixiongtao/article/details/104059437

https://blog.csdn.net/shixiongtao/article/details/104071621。

首先給出代碼的連結：https://download.csdn.net/download/shixiongtao/12119227。

程式分為，Lexer.py，Parser.py，Ast.py，和主程式run.py組成。

檔案結構如下：

├── LearnPLY
│   ├── Ast.py
│   ├── Lexer.py
│   ├── parser.out
│   ├── Parser.py
│   └── parsetab.py
├── run_lexer.py
├── run.py
└── test.ply

Ast.py中定義了抽象文法樹，Lexer.py定義了詞法結構，Parser.py定義了文法結構，parser.out和parsetab.py是自動生成的檔案，run_lexer.py是單獨測試Lexer.py的程式，run.py是主程式，test.ply是待解釋的程式。

首先來看Lexer.py檔案：

###   導入包   ###
import ply.lex as lex

###   是否列印詳細資訊   ###
LexerDebug = False

###   定義詞法解析類   ###
class Lexer(object):
    def __init__(self):
        self.lexer = lex.lex(object=self)

    def input(self, text):
        self.lexer.input(text)

    def token(self):
        return self.lexer.token()

    ###   定義保留字，字典，靜态變量   ###
    reserved = {
        'print': 'PRINT',

        'if': 'IF',
        'else': 'ELSE',
        'for': 'FOR',
        'while': 'WHILE',

        'break': 'BREAK',
        'continue': 'CONTINUE',
        
        'and': 'AND',
        'or': 'OR',

        'return': 'RETURN'
        }

    ###   定義token，靜态變量   ###
    tokens = [
        #變量名
        'NAME',
        #整數，浮點數，字元串
        'INT', 'FLOAT', 'STRING',
        #加，減，乘，除，模，指派
        'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 'ASSIGN',
        #(，)，[，]，{，}
        'LPAREN', 'RPAREN', 'LSQBRACK', 'RSQBRACK', 'LBRACK', 'RBRACK', 
        #大于，大于等于，小于，小于等于，等于，不等于，真，假，空
        'GT', 'GTE', 'LT', 'LTE', 'EQ', 'NEQ', 'TRUE', 'FALSE', 'NONE',
        #冒号，分号，逗号，注釋
        'COLON', 'SEMICOLON', 'COMMA', 'COMMENT',
        ]
    ###   将保留字和token合并到一塊查詢，靜态變量   ###
    tokens = tokens + list(reserved.values())

    ###   聲明正規表達式，順序代表了優先級，靜态變量   ###
    #省略空格
    t_ignore = ' \t'              
    #特殊符号，需要轉義
    #t_PLUS = '\+'
    def t_PLUS(self, t):
        '\+'
        if LexerDebug == True: print('t_PLUS')
        return t
   #t_MINUS = '-'
    def t_MINUS(self, t):
         '-'
         if LexerDebug == True: print('t_MINUS')
         return t
    #特殊符号，需要轉義
    #t_TIMES = '\*'
    def t_TIMES(self, t):
        '\*'
        if LexerDebug == True: print('t_TIMES')
        return t
    #t_DIVIDE = '/'
    def t_DIVIDE(self, t):
        '/'
        if LexerDebug == True: print('t_DIVIDE')
        return t
    #t_MOD = '%'
    def t_MOD(self, t):
        '%'
        if LexerDebug == True: print('t_DIVIDE')
        return t
    #特殊符号，需要轉義
    #t_LPAREN = '\('
    def t_LPAREN(self, t):
        '\('
        if LexerDebug == True: print('t_LPAREN')
        return t
    #特殊符号，需要轉義
    #t_RPAREN = '\)'
    def t_RPAREN(self, t):
        '\)'
        if LexerDebug == True: print('t_RPAREN')
        return t
    #特殊符号，需要轉義
    #t_LSQBRACK = '\['
    def t_LSQBRACK(self, t):
        '\['
        if LexerDebug == True: print('t_LSQBRACK')
        return t
    #特殊符号，需要轉義
    #t_RSQBRACK = '\]'
    def t_RSQBRACK(self, t):
        '\]'
        if LexerDebug == True: print('t_RSQBRACK')
        return t
    #特殊符号，需要轉義
    #t_LBRACK = '\{'
    def t_LBRACK(self, t):
        '\{'
        if LexerDebug == True: print('t_LBRACK')
        return t
    #特殊符号，需要轉義
    #t_RBRACK = '\}'
    def t_RBRACK(self, t):
        '\}'
        if LexerDebug == True: print('t_RBRACK')
        return t
    #t_EQ = '=='
    def t_EQ(self, t):
        '=='
        if LexerDebug == True: print('t_EQ')
        return t
    #t_ASSIGN = '='
    def t_ASSIGN(self, t):
        '='
        if LexerDebug == True: print('t_ASSIGN')
        return t
    #t_GTE = '>='
    def t_GTE(self, t):
        '>='
        if LexerDebug == True: print('t_GTE')
        return t
    #t_GT = '>'
    def t_GT(self, t):
        '>'
        if LexerDebug == True: print('t_GT')
        return t
    #t_LTE = '<='
    def t_LTE(self, t):
        '<='
        if LexerDebug == True: print('t_LTE')
        return t
    #t_LT = '<'
    def t_LT(self, t):
        '<'
        if LexerDebug == True: print('t_LT')
        return t
    #t_NEQ = '!='
    def t_NEQ(self, t):
        '!='
        if LexerDebug == True: print('t_NEQ')
        return t
    #t_COLON = ':'
    def t_COLON(self, t):
        ':'
        if LexerDebug == True: print('t_COLON')
        return t
    #t_SEMICOLON = ';'
    def t_SEMICOLON(self, t):
        ';'
        if LexerDebug == True: print('t_SEMICOLON')
        return t
    #t_COMMA = ','
    def t_COMMA(self, t):
        ','
        if LexerDebug == True: print('t_COMMA')
        return t
    #COMMENT = '\#.*'
    def t_COMMENT(self, t):
        '\#.*'
        #省略
        pass
    
    def t_newline(self, t):
        r'\n+'
        if LexerDebug == True: print('t_newline')
        t.lexer.lineno += len(t.value)

    def t_TRUE(self, t):
        'True'
        if LexerDebug == True: print('t_TRUE')
        t.value = True
        return t

    def t_FALSE(self, t):
        'False'
        if LexerDebug == True: print('t_FALSE')
        t.value = False
        return t

    def t_NONE(self, t):
        'None'
        if LexerDebug == True: print('t_NONE')
        t.value = None
        return t
    
    def t_NAME(self, t):
        r'[a-zA-Z_][a-zA-Z_0-9]*'
        #檢查是不是關鍵詞，如果不是，那麼預設傳回，NAME，
        t.type = Lexer.reserved.get(t.value, 'NAME')
        if LexerDebug == True: print('t_', t.type, ', value = ', t.value)
        return t

    def t_FLOAT(self, t):
        r'\d*\.\d+'
        t.value = float(t.value)
        if LexerDebug == True: print('t_FLOAT, value = ', t.value)
        return t

    def t_INT(self, t):
        r'\d+'
        t.value = int(t.value)
        if LexerDebug == True: print('t_INT, value = ', t.value)
        return t

    def t_STRING(self, t):
        r'\'(?:\\"|.)*?\''
        t.value = bytes(t.value.lstrip("'").rstrip("'"), "utf-8").decode("unicode_escape")
        if LexerDebug == True: print('t_STRING, value = ', t.value)
        return t

    def t_error(self, t):
        print("Lexer error = Illegal character '%s' at line '%d'" % (t.value[0], t.lexer.lineno))
        t.lexer.skip(1)

    def test(self, text):
        # Give the lexer some input
        self.lexer.input(text)
        for tok in self.lexer:
            print(tok)

在這個檔案中定義了如何将字元串比對為定義好的token。

其次是Parser.py，在這個檔案中定義了，如何将token識别為表達式，進而建構抽象文法樹，最終執行程式，代碼如下：

###   導入包   ###
import LearnPLY.Ast as AST
from LearnPLY.Lexer import Lexer

###   總列印次數   ###
printPNum = 0
###   是否列印詳細資訊   ###
ParserDebug = False

###   定義列印   ###
def printP(op, p):
    global printPNum;
    printPNum = printPNum + 1
    if ParserDebug == False : return
    print('-----step=%d-----op=\'%s\'-----' %(printPNum, op))
    for i in range(len(p)):
        print('op=\'%s\'---pos=\'%d\'---val=\'%s\'' %(op, i, p[i]))
        #print(op, 'pos = ', i, 'value', p[i])

###   定義文法解析類   ###
class Parser(object):
    ###   全局token，靜态變量   ###
    tokens = Lexer.tokens
    ###   操作符優先級定義，靜态變量   ###
    precedence = (
        #無結合性操作符
        ('nonassoc', 'LT', 'GT', 'LTE', 'GTE'),
        #左結合性操作符
        #先執行加法，是以加法在減法下面
        ('left', 'MINUS'),
        ('left', 'PLUS'),
        #先執行乘法，是以乘法在除法下面
         ('left', 'DIVIDE'),
        ('left', 'TIMES'),
        ('left', 'MOD'),
        #邏輯操作
        ('left', 'AND'),
        ('left', 'OR'),
        ('left', 'EQ'),
        ('left', 'NEQ'),
        #右結合性操作符
        ('right', 'UMINUS'),
    )

    ###   24.程式子產品定義   ###
    def p_stmt_list(p):
        """stmt_list : stmt_list stmt
                    | stmt
        """
        if len(p) == 2:
            p[0] = AST.StatementList()
            p[0].add_statement(p[1])
        else:
            p[1].add_statement(p[2])
            p[0] = p[1]
        printP('p_stmt_list', p)
    ###   23.WHILE定義   ###
    def p_stmt_while(p):
        """stmt : WHILE expr COLON stmt_suite
                    | WHILE expr COLON stmt_suite ELSE COLON stmt_suite
        """
        if len(p) == 5:
            p[0] = AST.While(p[2], p[4], None)
        else:
            p[0] = AST.While(p[2], p[4], p[7])
        printP('p_stmt_while', p)
    ###   22.IF定義   ###
    def p_stmt_if(p):
        """stmt : IF expr COLON stmt_suite
                    | IF expr COLON stmt_suite ELSE COLON stmt_suite
        """
        if len(p) == 5:
            p[0] = AST.If(p[2], p[4], None)
        else:
            p[0] = AST.If(p[2], p[4], p[7])
        printP('p_stmt_if', p)
    ###   21.程式{}定義   ###
    def p_suite(p):
        """stmt_suite : LBRACK stmt_list RBRACK"""
        p[0] = p[2]
        printP('p_suite', p)
    ################################################
    ###   20.列印   ###
    def p_expr_print_expr(p):
        """stmt : PRINT LPAREN expr RPAREN"""
        p[0] = AST.Print(p[3])
        printP('p_expr_print_expr', p)
    ###   19.變量   ###
    def p_expr_name(p):
        """expr : NAME"""
        p[0] = AST.Name(p[1])
        printP('p_expr_name', p)
    ###   18.指派   ###
    def p_expr_assign(p):
        """stmt : NAME ASSIGN expr"""
        p[0] = AST.Assign(p[1], p[3])
        printP('p_expr_assign', p)
    ###   17.括号   ###
    def p_expr_group(p):
        """expr : LPAREN expr RPAREN"""
        p[0] = p[2]
        printP('p_expr_group', p)
    ###   16.or   ###  
    def p_expr_or(p):
        """expr : expr OR expr"""
        p[0] = AST.BoolOpOr(p[1], p[3])
        printP('p_expr_or', p)
    ###   15.and   ###xxx
    def p_expr_and(p):
        """expr : expr AND expr"""
        p[0] = AST.BoolOpAnd(p[1], p[3])
        printP('p_expr_and', p)
    ###   14.!=   ###
    def p_expr_neq(p):
        """expr : expr EQ expr"""
        p[0] = AST.CompareOpNeq(p[1], p[3])
        printP('p_expr_neq', p)
    ###   14.==   ###
    def p_expr_eq(p):
        """expr : expr NEQ expr"""
        p[0] = AST.CompareOpEq(p[1], p[3])
        printP('p_expr_eq', p)
    ###   14.<=   ###
    def p_expr_lte(p):
        """expr : expr LTE expr"""
        p[0] = AST.CompareOpLte(p[1], p[3])
        printP('p_expr_lte', p)
    ###   13.<   ###
    def p_expr_lt(p):
        """expr : expr LT expr"""
        p[0] = AST.CompareOpLt(p[1], p[3])
        printP('p_expr_lt', p)
    ###   12.>=   ###
    def p_expr_gte(p):
        """expr : expr GTE expr"""
        p[0] = AST.CompareOpGte(p[1], p[3])
        printP('p_expr_gte', p)
    ###   11.>   ###
    def p_expr_gt(p):
        """expr : expr GT expr"""
        p[0] = AST.CompareOpGt(p[1], p[3])
        printP('p_expr_gt', p)
    ###   10.負号   ###
    def p_expr_uminus(p):
        """expr : MINUS expr %prec UMINUS"""
        p[0] = AST.BinOpTimes(AST.Number(-1), p[2])
        printP('p_expr_uminus', p)
    ###   9.取模  ###
    def p_expr_mod(p):
        """expr : expr MOD expr"""
        p[0] = AST.BinOpMod(p[1], p[3])
        printP('p_expr_mod', p)
    ###   8.除法  ###
    def p_expr_divide(p):
        """expr : expr DIVIDE expr"""
        p[0] = AST.BinOpDivde(p[1], p[3])
        printP('p_expr_divide', p)
    ###   7.乘法  ###
    def p_expr_times(p):
        """expr : expr TIMES expr"""
        p[0] = AST.BinOpTimes(p[1], p[3])
        printP('p_expr_times', p)
    ###   6.減法  ###
    def p_expr_minus(p):
        """expr : expr MINUS expr"""
        p[0] = AST.BinOpMinus(p[1], p[3])
        printP('p_expr_minus', p)
    ###   5.加法  ###
    def p_expr_plus(p):
        """expr : expr PLUS expr
        """
        p[0] = AST.BinOpPlus(p[1], p[3])
        printP('p_expr_plus', p)
    ###   4.STRING轉表達式   ###
    def p_expr_string(p):
        """expr : STRING"""
        #建構AST
        p[0] = AST.Str(p[1])
        printP('p_expr_string', p)
    ###   3.CONST轉表達式   ###
    def p_expr_const(p):
        """expr : TRUE
                | FALSE
                | NONE
        """
        p[0] = AST.Const(p[1])
        printP('p_expr_const', p)
    ###   2.FLOAT轉表達式   ###
    def p_expr_float(p):
        """expr : FLOAT"""
        #建構AST
        p[0] = AST.Number(p[1])
        printP('p_expr_float', p)
    ###   1.INT轉表達式   ###
    def p_expr_int(p):
        """expr : INT"""
        #建構AST
        p[0] = AST.Number(p[1])
        printP('p_expr_int', p)
    ###   異常函數   ###
    def p_error(p):
        if p:
            print("Parser error = value  = '%s', type = '%s', lineno = '%d'" % (p.value, p.type, p.lineno))
        else:
            print("Parser error at EOF")

最後是Ast.py，定義了抽象文法樹的各個節點。

給出run.py程式，

###   導入包   ###
from LearnPLY.Lexer import Lexer
from LearnPLY.Parser import Parser
import ply.yacc as yacc

###   程式示例   ###
textF = open('test.ply')
text = textF.read()

###   解釋器   ###
parser = yacc.yacc(module=Parser)
print('===============LEX===============')
ast = parser.parse(text, lexer=Lexer())
print('===============AST===============')
if ast == None:
    print('AST == None')
else:
    print(ast.getTree())
print('===============EXE===============')
if ast == None:
    print('AST == None')
else:
    ast.exe()
print('===============IDS===============')
from LearnPLY.Ast import ids
print('ids = ', ids)
print('===============END===============')
from LearnPLY.Parser import printPNum
print('printPNum = ', printPNum)

最後給出待解釋的程式，test.ply

###   程式示例   ###
print('test start')
a = 1
b = a + 2 - 3.4 * 5.6 / 7.8
print('b = ')
print(b)
print('str123')
c = 3 % 2
print('c = ')
print(c)
print('-c')
print(-c)
d = 1 > 2
d = 1 >= 2
d = 1 < 2
d = 1 <= 2
d = 1 == 2
d = 1 != 2
d = True and True
d = False or True
print('d = ')
print(d)
print('(d) = ')
print((d))
print('---------------')
if (1 < 2):
{
	print('(1 < 2) = ok')
}else:
{
	print('(1 < 2) = nok')
}
e = 5
while (e > 0):
{
	print('while e = ')
	print(e)
	e = e - 1
}
print('test end')

執行結果為：

>>> 
===============LEX===============
===============AST===============
PRINT 
| test start
=
| a
| 1
=
| b
| -
| | +
| | | a
| | | 2
| | /
| | | *
| | | | 3.4
| | | | 5.6
| | | 7.8
PRINT 
| b = 
PRINT 
| b
PRINT 
| str123
=
| c
| %
| | 3
| | 2
PRINT 
| c = 
PRINT 
| c
PRINT 
| -c
PRINT 
| *
| | -1
| | c
=
| d
| >
| | 1
| | 2
=
| d
| >=
| | 1
| | 2
=
| d
| <
| | 1
| | 2
=
| d
| <=
| | 1
| | 2
=
| d
| !=
| | 1
| | 2
=
| d
| ==
| | 1
| | 2
=
| d
| and
| | True
| | True
=
| d
| or
| | False
| | True
PRINT 
| d = 
PRINT 
| d
PRINT 
| (d) = 
PRINT 
| d
PRINT 
| ---------------
IF
| <
| | 1
| | 2
| PRINT 
| | (1 < 2) = ok
| PRINT 
| | (1 < 2) = nok
=
| e
| 5
WHILE
| >
| | e
| | 0
| PRINT 
| | while e = 
| PRINT 
| | e
| =
| | e
| | -
| | | e
| | | 1
PRINT 
| test end
===============EXE===============
test start
b = 
0.5589743589743592
str123
c = 
1
-c
-1
d = 
True
(d) = 
True
---------------
(1 < 2) = ok
while e = 
5
while e = 
4
while e = 
3
while e = 
2
while e = 
1
test end
===============IDS===============
ids =  {'a': 1, 'e': 0, 'b': 0.5589743589743592, 'd': True, 'c': 1}
===============END===============
printPNum =  138
>>>

通過這個小程式，可以比較清晰的認識解釋器的執行原理。

基于PLY的解釋器——實作了常見的文法

繼續閱讀

來自python的【條件控制/語句循環/break/continue/else/pass】一、條件控制二、語句循環

無法解析的外部符号 wmain，該符号在函數 "void cdecl mainCRTStartupHelper(struct HINSTANCE *,unsigned short con......

TestLink導出用例轉換工具(XML2Excel)

YAML簡介和PyYAML安全操作YAML支援的類型YAML的優點：yaml的基本文法python操作

Small tricks

libsvm for python 安裝

學習軟體測試基礎測試第七天

Zeppelin 配置通路 REST APIApache Zeppelin Configuration REST API

【Torch】最簡潔logging使用指南

27. Remove Element(清單)題目代碼

Cloud Studio初體驗

使用 ctypes 進行 Python 和 C 的混合程式設計

【python】【資料處理】畫多元資料分布圖

【python】netconf協定對接管理裝置

「Python 網絡自動化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 網絡裝置

在python中建立excel并寫入