假設文本檔案中儲存的是形如矩陣的資料,但由于文本檔案的格式所限,打開檔案後按行讀取到的類型是字元串,不友善後續按數值處理,是以需要進行類型轉換。參考網查并在Python2.7.5中調試後,總結為以下兩種方式:
(1)已知矩陣列數但行數未知(讀取檔案檔案時擷取行數)時:
from numpy import *
from FileDialog import *
import tkFileDialog
def txt2RealMat(): #define a function to get the real array
try:
filename=tkFileDialog.askopenfilename(initialdir='D:/') # the default path of .txt file
except ValueError:
pass
with open(filename,'r') as fr:
initDataRow=len(fr.readlines()) # get the number of rows
initDataCol=5 # suppose the default number of columns is 5
dataMat=zeros((initDataRow,initDataCol),dtype=float)
tempRow=0 # count the number of rows that have been read
with open(filename,'r') as fr:
for line in fr.readlines():
temp=line.strip('\n').split('\t')
dataMat[tempRow:]=temp
tempRow=tempRow+1
return dataMat
if __name__=="__main__":
dataMat=txt2RealMat()
print dataMat
(2)矩陣行數和列數均未知(都在讀取文本檔案時擷取)時:
from numpy import *
from FileDialog import *
import tkFileDialog
def txt2RealMat():
try:
filename=tkFileDialog.askopenfilename(initialdir='D:/') # an open file dialog with default path of the .txt file
except ValueError:
pass
with open(filename,'r') as fr:
initDataRow=len(fr.readlines()) # get the number of rows
data=[]
with open(filename,'r') as fr:
for line in fr.readlines():
temp=line.strip('\n').split('\t') # read one line
data.append(temp)
initDataCol=len(temp) # get the number of columns
for i in range(initDataRow):
for j in range(initDataCol):
data[i][j]=float(data[i][j]) # type transformation
return mat(data)
# test the function
if __name__=="__main__":
dataMat=txt2RealMat()
print dataMat