
源碼:
import turtle
##全局變量##
#詞頻排列顯示個數
count = 10
#單詞頻率數組-作為y軸資料
data = []
#單詞數組-作為x軸資料
words = []
#y軸顯示放大倍數-可以根據詞頻數量進行調節
yScale = 6
#x軸顯示放大倍數-可以根據count數量進行調節
xScale = 30
################# Turtle Start ####################
#從點(x1,y1)到(x2,y2)繪制線段
def drawLine(t, x1, y1, x2, y2):
t.penup()
t.goto (x1, y1)
t.pendown()
t.goto (x2, y2)
# 在坐标(x,y)處寫文字
def drawText(t, x, y, text):
t.penup()
t.goto (x, y)
t.pendown()
t.write(text)
def drawGraph(t):
#繪制x/y軸線
drawLine (t, 0, 0, 360, 0)
drawLine (t, 0, 300, 0, 0)
#x軸: 坐标及描述
for x in range(count):
x=x+1 #向右移一位,為了不畫在原點上
drawText(t, x*xScale-4, -20, (words[x-1]))
drawText(t, x*xScale-4, data[x-1]*yScale+10, data[x-1])
drawBar(t)
#繪制一個柱體
def drawRectangle(t, x, y):
x = x*xScale
y = y*yScale#放大倍數顯示
drawLine(t, x-5, 0, x-5, y)
drawLine(t, x-5, y, x+5, y)
drawLine(t, x+5, y, x+5, 0)
drawLine(t, x+5, 0, x-5, 0)
#繪制多個柱體
def drawBar(t):
for i in range(count):
drawRectangle(t, i+1, data[i])
################# Turtle End ####################
#對文本的每一行計算詞頻的函數
def processLine(line, wordCounts):
#用空格替換标點符号
line = replacePunctuations(line)
#從每一行擷取每個詞
words = line.split()
for word in words:
if word in wordCounts:
wordCounts[word] += 1
else:
wordCounts[word] = 1
#空格替換标點的函數
def replacePunctuations(line):
for ch in line:
if ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'""":
line = line.replace(ch, " ")
return line
def main():
#使用者輸入一個檔案名
filename = input("enter a filename:").strip()
infile = open(filename, "r")
#建立用于計算詞頻的空字典
wordCounts = {}
for line in infile:
processLine(line.lower(), wordCounts)
#從字典中擷取資料對
pairs = list(wordCounts.items())
#清單中的資料對交換位置,資料對排序
items = [[x,y]for (y,x)in pairs]
items.sort()
#輸出count個數詞頻結果
for i in range(len(items)-1, len(items)-count-1, -1):
print(items[i][1]+"\t"+str(items[i][0]))
data.append(items[i][0])
words.append(items[i][1])
infile.close()
#根據詞頻結果繪制柱狀圖
turtle.title('詞頻結果柱狀圖')
turtle.setup(900, 750, 0, 0)
t = turtle.Turtle()
t.hideturtle()
t.width(3)
drawGraph(t)
#調用main()函數
if __name__ == '__main__':
main()
原文位址:
http://www.icourse163.org/learn/BIT-268001?tid=1002001005#/learn/content?type=detail&id=1002613039&cid=1002856109