天天看點

python軟體占多少記憶體_Python程式占用了太多記憶體

函數coinT()使用ADF檢驗和Hurst指數檢驗兩個時間序列是否平穩。時間序列存儲在1511x6 CSV檔案中,但是對于測試,函數stock()隻傳回第5列的向量。總共有50個檔案。程式似乎占用了太多記憶體,因為它會使電腦在運作約30秒後崩潰。它可以很好地處理15個檔案,但在較大的檔案集(大于50個)時崩潰。在

有人能幫我找出是什麼東西占用了這麼多記憶體嗎?我嘗試過将計算拆分為多個函數并删除對象,但沒有太大幫助。在import numpy as np

import pandas as pd

import statsmodels.tsa.stattools as ts

import csv

import timeit

from numpy import log, polyfit, sqrt, std, subtract

from pandas.stats.api import ols

import os

src = 'C:/Users/PC/Desktop/Magistr/Ibpython/testing/'

filenames = next(os.walk(src))[2] #load all stock file names into array

cointegratedPairs = []

def hurst(ts):

"""Returns the Hurst Exponent of the time series vector ts

H<0.5 - The time series is mean reverting

H=0.5 - The time series is a Geometric Brownian Motion

H>0.5 - The time series is trending"""

# Create the range of lag values

lags = range(2, 100)

# Calculate the array of the variances of the lagged differences

tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]

# Use a linear fit to estimate the Hurst Exponent

poly = polyfit(log(lags), log(tau), 1)

del lags

del tau

# Return the Hurst exponent from the polyfit output

return poly[0]*2.0

#Convert file into an array

def stock(filename):

#read file into array and get it's length

delimiter = ","

with open(src + filename,'r') as dest_f:

data_iter = csv.reader(dest_f,

delimiter = delimiter,

quotechar = '"')

data = [data for data in data_iter]

data_array = np.asarray(data)[:,5]

return data_array

del data

del data_array

#Check if two time series are cointegrated

def coinTest(itemX, itemY):

indVar = map(float, stock(itemX)[0:1000]) #2009.05.22 - 2013.05.14

depVar = map(float, stock(itemY)[0:1000]) #2009.05.22 - 2013.05.14

#Calculate optimal hedge ratio "beta"

df = pd.DataFrame()

df[itemX] = indVar

df[itemY] = depVar

res = ols(y=df[itemY], x=df[itemX])

beta_hr = res.beta.x

alpha = res.beta.intercept

df["res"] = df[itemY] - beta_hr*df[itemX] - alpha

#Calculate the CADF test on the residuals

cadf = ts.adfuller(df["res"])

#Reject the null hypothesis at 1% confidence level

if cadf[4]['1%'] > cadf[0]:

#Hurst exponent test if residuals are mean reverting

if hurst(df["res"]) < 0.4:

cointegratedPairs.append((itemY,itemX))

del indVar

del depVar

del df[itemX]

del df[itemY]

del df["res"]

del cadf

#Main function

def coinT():

limit = 0

TotalPairs = 0

for itemX in filenames:

for itemY in filenames[limit:]:

TotalPairs +=1

if itemX == itemY:

next

else:

coinTest(itemX, itemY)

limit +=1