通过parsing-chart实现句法分析

思路来源于宗成庆老师PPT第9章线图分析法。在这里仅把算法描述放到这里，宗成庆老师的PPT上的例子对于实现该算法非常有帮助，由于例子较长，在这里就不放了，若有需要，自行查看。

PPT链接：http://www.nlpr.ia.ac.cn/cip/ZongReportandLecture/ReportandLectureIndex.htm

该算法在第九章第一部分

该算法需要三个容器：1.agenda代理表，一般用列表或数组存储即可。2.Activate_Arc:活性边，用队列或栈存储，本人用队列存储在本文中。3.chart：为一个句子长度+1乘以句子长度+1的矩阵，用以存储最后的结果

算法描述如下：

通过parsing-chart实现句法分析

代码如下：

1.队列

class Queue:
    def __init__(self):
        self.first = None
        self.last = None
        self.N = 0

    def enqueue(self,item):
        oldLast = self.last
        self.last = Node(item)
        self.last.next = None
        if self.isEmpty():
            self.first = self.last
        else:
            oldLast.next = self.last

        self.N += 1

    def unqueue(self):
        if self.isEmpty():
            raise QueueIsEmptyException('队列已经没有元素')
        item = self.first
        self.first = self.first.next
        if self.isEmpty():
            self.last = None

        self.N -= 1
        return item

    def isEmpty(self):
        if self.N == 0:
            return True


class Node:
    def __init__(self,item):
        self.item = item
        self.next = None

class QueueIsEmptyException(Exception):
    def __init__(self,errInfo):
        super.__init__(self)
        self.errorinfo = errInfo
    def __str__(self):
        return self.errorinfo

if __name__ == '__main__':
    queue = Queue()
    queue.enqueue(1)
    queue.enqueue(2)
    # print(queue)
    print(queue.N)
    item = queue.unqueue()
    print(item.item)
    print(queue.N)
    queue.unqueue()
    queue.unqueue()

2.线图分析法parsing-chart

from Parsing.Chart.LinearQueue import Queue


class parsing_chart:
    def __init__(self,voca_tag):
        queue = Queue()
        self.agenda = queue
        self.voca_tag = voca_tag
        self.ActiveArc = []
        self.chart = []
        self.rules = [['S','NP','VP',0,0],['NP','Det','N',0,0],['VP','VP','PP',0,0],['VP','V','NP',0,0],['PP','Prep','NP',0,0]]
        self.changed_rules = self.rules.copy()
        self.rowFirst = None
        self.columnFirst = None

    def parsing(self,sentence):
        print('=======开始分析句子结构=======')
        # self.agenda = LinearQueue()
        s_length = len(sentence)
        count = -1
        for i in range(s_length+1):
            self.chart.append([0]*(s_length+1))
        while(count < len(sentence)):
            if self.agenda.isEmpty():
                # word_pos = self.voca_tag[i] + '('+str(i)+','+str(i+1)+')'
                count += 1
                if count == len(sentence):
                    break
                word_pos = [self.voca_tag[count],count, count + 1]
                self.agenda.enqueue(word_pos)
            else:
                #如果符合tag X符合规则 A ->X r  则添加标记A ->X。r(i,j)，并添加至活性边列表中
                word_pos = self.agenda.unqueue().item
                for i in range(len(self.rules)):
                    rule = self.changed_rules[i]
                    if word_pos[0] == rule[1]:
                        self.add_ActiveArc(rule,word_pos)
                #扩展活性边
                self.extend_arc(word_pos,s_length,count)

        # self.get_text_struc(sentence)


    def add_ActiveArc(self,rule,word_pos):
        word = word_pos[0]
        if '。' in rule:
            rule.remove('。')

        # 有些规则如VP -> VP PP 需要将。移动到后面的VP处
        index = [i for i,x in enumerate(rule) if x == word]
        if len(index) > 1:
            real_index = index[len(index)-1]
        else:
            real_index = index[0]
        rule.insert(real_index+1,'。')
        rule[4] = word_pos[1]
        rule[5] = word_pos[2]
        self.ActiveArc.append(rule)

    def extend_arc(self,word_pos,s_length,count):
        self.chart[word_pos[1]][word_pos[2]] = word_pos[0]
 
        for i in range(len(self.ActiveArc)):
            #只有tag X符合 A -> u。X才执行以下程序
            if word_pos[0] == self.ActiveArc[i][3]:
                if 'S' == self.ActiveArc[i][0]:
                    self.chart[0][s_length] = 'S'
                else:
                    self.ActiveArc[i] =[self.ActiveArc[i][0],self.ActiveArc[i][1],self.ActiveArc[i][3],self.ActiveArc[i][2],self.ActiveArc[i][4],self.ActiveArc[i][5]]
                    self.agenda.enqueue([self.ActiveArc[i][0],self.ActiveArc[i][4],word_pos[2]])

    def get_text_struc(self,sentence):
        print('=======句子结构如下=======')
        # print(self.chart)
        for i in range(len(self.chart)-1):
            for j in range(len(self.chart)):
                if self.chart[i][j] != 0:
                    print(str(sentence[i:j])+ ':' + self.chart[i][j])

    def get_structure_tree(self,sentence,row,column):
        col_value = None
        row_value = None
        n_row1 = None
        n_col1 = None
        n_row2 = None
        n_col2 =None
        if row is None or column is None:
            return

        curr_tag = self.chart[row][column]

        for i in range(column-1,-1,-1):
            if self.chart[row][i] != 0:
                row_value = self.chart[row][i]
                # print(row_value)
                n_row1 = row
                n_col1 = i
                break

        for r in range(row+1,len(self.chart) - 1):
            if self.chart[r][column] != 0:
                col_value = self.chart[r][column]
                # print(col_value)
                n_row2 = r
                n_col2 = column
                break

        if col_value is not None and row_value is not None:
            print(' '.join(sentence[row:column])+'/'+curr_tag + '  ->  ' + ' '.join(sentence[n_row1:n_col1]) + '/' + row_value + '   ' + ' '.join(sentence[n_row2:n_col2]) + '/' + col_value)

        self.get_structure_tree(sentence,n_row1,n_col1)
        self.get_structure_tree(sentence,n_row2,n_col2)




if __name__ == '__main__':
    sentence = ['the', 'boy', 'hits', 'the', 'dog', 'with', 'a', 'rod']
    voca_tag = ['Det','N','V','Det','N','Prep','Det','N']
    pc = parsing_chart(voca_tag)
    pc.parsing(sentence)
    nrow = 0
    ncol = len(pc.chart)-1
    pc.get_structure_tree(sentence,nrow,ncol)

3.结果

通过parsing-chart实现句法分析

4.总结

此次代码实现浪费太多时间。。因为没有太研究例子，而仅仅根据算法写代码，写的总是有问题，之后详细做了下PPT上的例子，问题便迎刃而解，以前看过一个童鞋说过：多读书、多总结、少写代码十分有道理，先把思路理清，代码二点实现便水到渠成了

通过parsing-chart实现句法分析

继续阅读

计算文本相似度方法总结（二）

#yyds干货盘点#——如何通俗理解AST抽象语法树(一)

Constituent Parsing & Dependency Parsing 句法分析简介

李宏毅自然语言处理——依存句法分析

李宏毅自然语言处理——成分句法分析

论文笔记：Ordered Neurons: Integrating Tree Structures into Recurrent Neural Networks论文笔记：Ordered Neurons: Integrating Tree Structures into Recurrent Neural Networks导语摘要1 简介2 相关工作3 有序神经元4 ON-LSTM

论文笔记：Straight to the Tree: Constituency Parsing with Neural Syntactic Distance论文笔记：Straight to the Tree: Constituency Parsing with Neural Syntactic Distance导语摘要1 简介2 句法解析树的句法距离3 学习句法距离4 实验5 相关工作6 总结

论文笔记：Neural Language Modeling by Jointly Learning Syntax and Lexicon论文笔记：Neural Language Modeling by Jointly Learning Syntax and Lexicon导语摘要1 简介2 相关工作3 研究动机4 句法结构建模5 语言模型6 实验7 结论

pyltp实现句法分析并画出句法图

Transition-based Parsing 简介

所有文章汇总

PCFG构造语法结构树相关问题思考

【CS224n】(assignment3)Dependency Parsing

哈工大SCIR：ACL 2010-2020研究趋势总结

【NLP】LTP中文工具集使用