天天看點

Python2.7.5:讀取文本檔案中的資料到實型矩陣

假設文本檔案中儲存的是形如矩陣的資料,但由于文本檔案的格式所限,打開檔案後按行讀取到的類型是字元串,不友善後續按數值處理,是以需要進行類型轉換。參考網查并在Python2.7.5中調試後,總結為以下兩種方式:

(1)已知矩陣列數但行數未知(讀取檔案檔案時擷取行數)時:

from numpy import *

from FileDialog import *

import tkFileDialog

def txt2RealMat():  #define a function to get the real array

    try:

        filename=tkFileDialog.askopenfilename(initialdir='D:/')     # the default path of .txt file

    except ValueError:

        pass

    with open(filename,'r') as fr:

        initDataRow=len(fr.readlines())     # get the number of rows

    initDataCol=5      # suppose the default number of columns is 5

    dataMat=zeros((initDataRow,initDataCol),dtype=float)

    tempRow=0      # count the number of rows that have been read

    with open(filename,'r') as fr:    

        for line in fr.readlines():

            temp=line.strip('\n').split('\t')          

            dataMat[tempRow:]=temp

            tempRow=tempRow+1

    return dataMat

if __name__=="__main__":

    dataMat=txt2RealMat()

    print dataMat

(2)矩陣行數和列數均未知(都在讀取文本檔案時擷取)時:

from numpy import *

from FileDialog import *

import tkFileDialog

def txt2RealMat():

    try:

        filename=tkFileDialog.askopenfilename(initialdir='D:/')     # an open file dialog with default path of the .txt file

    except ValueError:

        pass

    with open(filename,'r') as fr:

        initDataRow=len(fr.readlines())     # get the number of rows

    data=[]

    with open(filename,'r') as fr:    

        for line in fr.readlines():

            temp=line.strip('\n').split('\t')  # read one line

            data.append(temp)

        initDataCol=len(temp)     # get the number of columns

        for i in range(initDataRow):

            for j in range(initDataCol):

                data[i][j]=float(data[i][j])    # type transformation

    return mat(data)

# test the function      

if __name__=="__main__":

    dataMat=txt2RealMat()

    print dataMat