天天看點

matlab注釋分析高斯混合模型

源碼from:http://blog.csdn.net/abcjennifer/article/details/8198352#reply

Rachel-Zhang 提供的源碼。

高斯混合模型

沒有輸入參數判斷,沒有協方差是否可逆驗證。

我要用語音處理的,電腦卡當機,逆矩陣不是所有的都有的。

或者用文庫裡面的代碼

http://wenku.baidu.com/link?url=-bK88ETqMCURoMq1z1Lyiz4nzhpSmDq_J23o8yA0P1NVFM7GoFiH9UjC1b6R-jyYpimEuwYXebmU5WPd7Ek0UsnI_Zq6R5cjeMwmQVM5lWq

function varargout = gmm(X, K_or_centroids)
% ============================================================
 % Expectation-Maximization iteration implementation of
 % Gaussian Mixture Model.
 %
 % PX = GMM(X, K_OR_CENTROIDS)
 % [PX MODEL] = GMM(X, K_OR_CENTROIDS)
 %
 %  - X: N-by-D data matrix.----------NxD的矩陣
 %  - K_OR_CENTROIDS: either K indicating the number of--------單個數字K/[K] 或者 KxD矩陣的聚類
 %       components or a K-by-D matrix indicating the
 %       choosing of the initial K centroids.
 %
 %  - PX: N-by-K matrix indicating the probability of each--------NxK矩陣,第N個資料點占第K個單一高斯機率密度
 %       component generating each point.
 %  - MODEL: a structure containing the parameters for a GMM:
 %       MODEL.Miu: a K-by-D matrix.-------------KxD矩陣,初始化聚類樣本,後面循環為每個資料點機率歸一化後再聚類機率歸一化後的均值矩陣
 %       MODEL.Sigma: a D-by-D-by-K matrix.------DxDxK矩陣,初始化資料點對于聚類的方差[聚類等機率],後面循環為均值矩陣改變以後的方差
 %       MODEL.Pi: a 1-by-K vector.-----------1xK矩陣,初始化資料點使用聚類的機率分布,後面循環高斯混合機率系數歸一化的分母Nk/N,高斯混合權重系數向量
 % ============================================================
 
     threshold = 1e-15;%門檻值
     [N, D] = size(X);%矩陣X的行N,列D
  
     if isscalar(K_or_centroids)%判斷值是否為1x1矩陣即單個數字
         K = K_or_centroids;%取[k]的k
         % randomly pick centroids
        rndp = randperm(N);%傳回一行由1到N個的整數無序排列的向量
         centroids = X(rndp(1:K), :);%取出X矩陣行打亂後的矩陣的前K行
     else
        K = size(K_or_centroids, 1);%取矩陣K_or_centroids的行數
         centroids = K_or_centroids;%取矩陣K_or_centroids矩陣
     end
 
     % initial values
     [pMiu pPi pSigma] = init_params();
 %初始化 嵌套函數後面可見。KxD矩陣pMiu聚類采樣點,1*K向量pPi使用同一個聚類采樣點出現機率,D*D*K的矩陣pSigma矩陣X的列項j對于聚類采樣點的協方差
  
     Lprev = -inf; %inf表示正無究大,-inf表示為負無究大
     while true
         Px = calc_prob();%NxK矩陣Px存放聚類點k(共有聚類點K個)對于全部資料點的正态分布生成樣本的機率密度
  
         % new value for pGamma
        pGamma = Px .* repmat(pPi, N, 1);%NxK矩陣pGamma在使用聚類采樣點k的條件下某個資料點n生成樣本機率密度(條件機率密度)
         pGamma = pGamma ./ repmat(sum(pGamma, 2), 1, K); %NxK矩陣pGamma對于使用資料點條件機率密度行向歸一化。
      %求每個樣本由第K個聚類,也叫“component“生成的機率)
 
         % new value for parameters of each Component
         Nk = sum(pGamma, 1);%1xK矩陣Nk第k個聚類點被資料點使用的機率總和
         pMiu = diag(1./Nk) * pGamma' * X;
 %KxD矩陣 重新計算每個component的均值 維數變化KxK*KxN*NxD=KxD 資料點進行 聚類機率歸一化.條件機率密度.資料點=得到均值(期望)
 %均值=Σ機率Pi*資料點某一屬性Xi;而這裡還多了個聚類機率Nk
        pPi = Nk/N; %更新混合高斯的權重系數
        for kk = 1:K %重新計算每個component的協方差矩陣
             Xshift = X-repmat(pMiu(kk, :), N, 1);%NxD矩陣Xshift在某一個聚類點的情況下,每個屬性在這個聚類下的對均值(期望)差數(X-μi)
             pSigma(:, :, kk) = (Xshift' * ...
                 (diag(pGamma(:, kk)) * Xshift)) / Nk(kk);%DxD矩陣pSigma(::,i) 機率Pi  聚類機率=1/Nk(i)
                 %第i個方差矩陣= Σ(X-μi)轉置*機率Pi*(X-μi)*第i個聚類機率
         end
  
         % check for convergence
         L = sum(log(Px*pPi')); %求混合高斯分布的似然函數
        if L-Lprev < threshold %随着疊代次數的增加,似然函數越來越大,直至不變
             break; %似然函數收斂則退出
         end
        Lprev = L;
     end
 
     if nargout == 1 %如果傳回是一個參數的話,那麼varargout=Px;
         varargout = {Px};
     else %否則,傳回[Px model],其中model是結構體
        model = [];
         model.Miu = pMiu;
         model.Sigma = pSigma;
         model.Pi = pPi;
         varargout = {Px, model};
     end
 

     function [pMiu pPi pSigma] = init_params()
         pMiu = centroids;%得X矩陣中的任意K行,KxD矩陣  聚類點
         pPi = zeros(1, K);%擷取K維零向量[0 0 ...0]     權重系數(每行資料與聚類中點最小距離的機率)
         pSigma = zeros(D, D, K);%擷取K個DxD的零矩陣
  
         %distmat為D維距離差平方和
         % hard assign x to each centroids  %X有NxD;sum(X.*X, 2)為Nx1; repmat(sum(X.*X, 2), 1, K)行向整體1倍,列向整體K倍;結果NxK
        distmat = repmat(sum(X.*X, 2), 1, K) + ... %distmat第j行的第i個元素表示第j個資料與第i個聚類點的距離,如果資料有4個,聚類2個,那麼distmat就是4*2矩陣
            repmat(sum(pMiu.*pMiu, 2)', N, 1) - 2*X*pMiu'; %sum(A,2)結果為每行求和列向量,第i個元素是第i行的求和;
        [dummy labels] = min(distmat, [], 2); %傳回列向量dummy和labels,dummy向量記錄distmat的每行的最小值,labels向量記錄每行最小值的列号(多個取第一個),即是第幾個聚類,labels是N×1列向量,N為樣本數
 
         for k=1:K
             Xk = X(labels == k, :); %把标志為同一個聚類的樣本組合起來
            pPi(k) = size(Xk, 1)/N; %求混合高斯模型的權重系數,pPi為1*K的向量  
            pSigma(:, :, k) = cov(Xk); 
		%如果X為Nx1數組,那麼cov(Xk)求單個高斯模型的協方差矩陣
		%如果X為NxD(D>1)的矩陣,那麼cov(Xk)求聚類樣本的協方差矩陣
		%cov()求出的為方陣--《機率論與數理統計》-多元随機變量的數字特征,且是對稱矩陣(上三角和下三角對稱)--《線性代數》
		%pSigma為D*D*K的矩陣
        end
     end
  
     function Px = calc_prob()
         Px = zeros(N, K);%NxK零矩陣
         for k = 1:K
             Xshift = X-repmat(pMiu(k, :), N, 1); %NxD矩陣Xshift表示為對于一個1xD聚類點向量行向增N倍的樣本矩陣-Uk,第i行表示xi-uk
            inv_pSigma = inv(pSigma(:, :, k)); %求協方差矩陣的逆,pSigmaD*D*K的矩陣, inv_pSigmaD*D的矩陣,Σ^-1
             tmp = sum((Xshift*inv_pSigma) .* Xshift, 2); 
     	%tmp為N*1矩陣,第i行表示(xi-uk)^T*Sigma^-1*(xi-uk) 即-(x-μ)轉置x(Σ^-1).(x-μ)   ----矩陣有叉乘(矩陣乘)和點乘
            coef = (2*pi)^(-D/2) * sqrt(det(inv_pSigma));
		 %求多元正态分布中指數前面的系數,(2π)^(-d/2)*|(Σ^-(1/2))|
            Px(:, k) = coef * exp(-0.5*tmp); %NxK矩陣求單獨一個D維正态分布生成樣本的機率密度或貢獻
        end
     end
 end
%聚類  資料挖掘
%矩陣算法 線性代數
%機率密度,近似然  數理統計
           

被我稍微修改備注的

function  model = gmmEM(data,Kk,option)
%
% K 為model數分為幾個聚類
% Reference: PRML p438-439
tic 
if nargin < 3
    option.eps = 1e-12;%收斂設定
    option.maxiter = 1000;%
    
end
global num_data K;
K=Kk;
X = data';  %X為D*N型資料,跟PRML對樣本資料描述相反
[dim,num_data] = size(X);
%Initialize
%-------------------------------
%K = numel(unique(data.y));
[inx, C,~] = kmeans(data,K);%[inx, C,~] = kmeans(X',K);把對象分為K個聚類
mu = C';%聚類點均值轉置
%inx D個資料項的聚類編号
pai = zeros(1,K);%權重系數清零
E = zeros(dim,dim,K);

for k=1:K
    pai(k) = sum(inx==k); %inx==index  統一聚類的個數統計
    E(:,:,k) = eye(dim);%E機關矩陣 初始化協方差矩陣 
end 
pai = pai/num_data;%各個聚類的權重系數
iter = 0;%遊标、統計疊代次數
log_val = logGMM(X,mu,E,pai);
try
    iter
while   iter<option.maxiter     
    iter = iter+1; 
    % E step 
    Yz = compu_Yz(X,mu,E,pai);%聚類k歸一化後的多元條件正态機率 
    %前面使用了資料點使用k聚類歸一化(縱向歸一化)而後造成沒有橫向歸一化
    % M step 高斯疊代(橫向歸一化需要進行均值,協方差矩陣更新)
    NK = sum(Yz);%每個聚類被使用的比例總和存放1Xk, 用于後面的橫向歸一化
    for k = 1:K
         mu(:,k) = 1/NK(k)*X*Yz(:,k);  %均值矩陣更新 DXN NXK
        %均值=Σ機率Pi*資料點某一屬性Xi;而這裡還多了個聚類機率Nk  
        Ek = zeros(dim,dim);%協方差矩陣清零
        %n=1:num_data;
        for n=1:num_data
            Ek = Ek+Yz(n,k)*(X(:,n)-mu(:,k))*(X(:,n)-mu(:,k))';
        end
        E(:,:,k) = 1/NK(k)*Ek; 
        
         %第i個方差矩陣= Σ(X-μi)轉置*機率Pi*(X-μi)*第i個聚類機率       
    end
    pai = NK/num_data;
    %檢查是否收斂  
    log_val_new = logGMM(X,mu,E,pai);
    if abs(log_val_new-log_val) < option.eps        
        model.Yz = Yz;
        model.mu = mu;
        model.E = E;
        model.iter = iter;
       return;
    end
    log_val = log_val_new;
    if mod(iter,10)==0
        disp(['-----進行第' num2str(iter) '疊代...'])
    end
end
catch
    fprintf('出錯!!\n已經疊代次數:%d\n最大疊代次數\n',iter,option.maxiter);
    model.Yz = Yz;
    model.mu = mu;
    model.E = E;
    model.iter = iter;
    %return;
end    
if iter==option.maxiter
    fprintf('達到最大疊代次數%d',maxiter)
    model.Yz = Yz;
    model.mu = mu;
    model.E = E;
    model.iter = iter;
end
toc
%model.usedTime = toc-tic;
end

%------------------------------------
function val = logGMM(X,mu,E,pai)
%樣本資料X(行資料項,列屬性)。均值矩陣。協方差矩陣(方陣,對角對稱)。權重系數
global num_data K
val = 0;
%N = size(X,2);
%K = size(mu,2);
for n=1:num_data
    tmp = 0;
    for k=1:K        
        p = mvnpdf(X(:,n),mu(:,k),E(:,:,k));
        tmp = tmp+pai(k)*p;
    end
    val = val + log(tmp);
end
end

%---------------------------------------------------------------
function Yz = compu_Yz(X,mu,E,pai)
%樣本資料X(行資料項,列屬性)。均值矩陣。協方差矩陣。權重系數
    global num_data K
    Yz = zeros(num_data,K);%清零
    for n = 1:num_data
        Y_nK = zeros(1,K);%清零 第k個聚類的
        for k=1:K
            Y_nK(k) = pai(k)*mvnpdf(X(:,n),mu(:,k),E(:,:,k));
            %庫内mvnpdf 傳回DX1數組Y_nk   (DX1;DX1;DXD)
            %mvnpdf 單獨一個N維正态分布生成樣本的機率密度(離散此處值當作密度)
            %pai(k)*正态機率密度=條件機率密度(在使用某個聚類的前提下)
        end
        Yz(n,:) = Y_nK/sum(Y_nK);%機率歸一化 (一個資料使用k個聚類的比例)
        %求第n個資料分别在聚類中的單獨一個N維正态分布生成樣本的機率(密度)
    end
end
           

matlab自帶mvnpdf的源代碼

function y = mvnpdf(X, Mu, Sigma)
%MVNPDF Multivariate normal probability density function (pdf).
%   Y = MVNPDF(X) returns the probability density of the multivariate normal
%   distribution with zero mean and identity covariance matrix, evaluated at
%   each row of X.  Rows of the N-by-D matrix X correspond to observations or
%   points, and columns correspond to variables or coordinates.  Y is an
%   N-by-1 vector.
%
%   Y = MVNPDF(X,MU) returns the density of the multivariate normal
%   distribution with mean MU and identity covariance matrix, evaluated
%   at each row of X.  MU is a 1-by-D vector, or an N-by-D matrix, in which
%   case the density is evaluated for each row of X with the corresponding
%   row of MU.  MU can also be a scalar value, which MVNPDF replicates to
%   match the size of X.
%
%   Y = MVNPDF(X,MU,SIGMA) returns the density of the multivariate normal
%   distribution with mean MU and covariance SIGMA, evaluated at each row
%   of X.  SIGMA is a D-by-D matrix, or an D-by-D-by-N array, in which case
%   the density is evaluated for each row of X with the corresponding page
%   of SIGMA, i.e., MVNPDF computes Y(I) using X(I,:) and SIGMA(:,:,I).
%   If the covariance matrix is diagonal, containing variances along the 
%   diagonal and zero covariances off the diagonal, SIGMA may also be
%   specified as a 1-by-D matrix or a 1-by-D-by-N array, containing 
%   just the diagonal. Pass in the empty matrix for MU to use its default 
%   value when you want to only specify SIGMA.
%
%   If X is a 1-by-D vector, MVNPDF replicates it to match the leading
%   dimension of MU or the trailing dimension of SIGMA.
%
%   Example:
%
%      mu = [1 -1]; Sigma = [.9 .4; .4 .3];
%      [X1,X2] = meshgrid(linspace(-1,3,25)', linspace(-3,1,25)');
%      X = [X1(:) X2(:)];
%      p = mvnpdf(X, mu, Sigma);
%      surf(X1,X2,reshape(p,25,25));
%
%   See also MVTPDF, MVNCDF, MVNRND, NORMPDF.

%   Copyright 1993-2011 The MathWorks, Inc.


if nargin<1
    error(message('stats:mvnpdf:TooFewInputs'));
elseif ndims(X)~=2
    error(message('stats:mvnpdf:InvalidData'));
end

% Get size of data.  Column vectors provisionally interpreted as multiple scalar data.
[n,d] = size(X);
if d<1
    error(message('stats:mvnpdf:TooFewDimensions'));
end

% Assume zero mean, data are already centered
if nargin < 2 || isempty(Mu)
    X0 = X;

% Get scalar mean, and use it to center data
elseif numel(Mu) == 1
    X0 = X - Mu;

% Get vector mean, and use it to center data
elseif ndims(Mu) == 2
    [n2,d2] = size(Mu);
    if d2 ~= d % has to have same number of coords as X
        error(message('stats:mvnpdf:ColSizeMismatch'));
    elseif n2 == n % lengths match
        X0 = X - Mu;
    elseif n2 == 1 % mean is a single row, rep it out to match data
        X0 = bsxfun(@minus,X,Mu);
    elseif n == 1 % data is a single row, rep it out to match mean
        n = n2;
        X0 = bsxfun(@minus,X,Mu);  
    else % sizes don't match
        error(message('stats:mvnpdf:RowSizeMismatch'));
    end
    
else
    error(message('stats:mvnpdf:BadMu'));
end

% Assume identity covariance, data are already standardized
if nargin < 3 || isempty(Sigma)
    % Special case: if Sigma isn't supplied, then interpret X
    % and Mu as row vectors if they were both column vectors
    if (d == 1) && (numel(X) > 1)
        X0 = X0';
        d = size(X0,2);
    end
    xRinv = X0;
    logSqrtDetSigma = 0;
    
% Single covariance matrix
elseif ndims(Sigma) == 2
    sz = size(Sigma);
    if sz(1)==1 && sz(2)>1
        % Just the diagonal of Sigma has been passed in.
        sz(1) = sz(2);
        sigmaIsDiag = true;
    else
        sigmaIsDiag = false;
    end
    
    % Special case: if Sigma is supplied, then use it to try to interpret
    % X and Mu as row vectors if they were both column vectors.
    if (d == 1) && (numel(X) > 1) && (sz(1) == n)
        X0 = X0';
        d = size(X0,2);
    end
    
    %Check that sigma is the right size
    if sz(1) ~= sz(2)
        error(message('stats:mvnpdf:BadCovariance'));
    elseif ~isequal(sz, [d d])
        error(message('stats:mvnpdf:CovSizeMismatch'));
    else
        if sigmaIsDiag
            if any(Sigma<=0)
                error(message('stats:mvnpdf:BadDiagSigma'));
            end
            R = sqrt(Sigma);
            xRinv = bsxfun(@rdivide,X0,R);
            logSqrtDetSigma = sum(log(R));
        else
            % Make sure Sigma is a valid covariance matrix
            [R,err] = cholcov(Sigma,0);
            if err ~= 0
                error(message('stats:mvnpdf:BadMatrixSigma'));
            end
            % Create array of standardized data, and compute log(sqrt(det(Sigma)))
            xRinv = X0 / R;
            logSqrtDetSigma = sum(log(diag(R)));
        end
    end
    
% Multiple covariance matrices
elseif ndims(Sigma) == 3
    
    sz = size(Sigma);
    if sz(1)==1 && sz(2)>1
        % Just the diagonal of Sigma has been passed in.
        sz(1) = sz(2);
        Sigma = reshape(Sigma,sz(2),sz(3))';
        sigmaIsDiag = true;
    else
        sigmaIsDiag = false;
    end

    % Special case: if Sigma is supplied, then use it to try to interpret
    % X and Mu as row vectors if they were both column vectors.
    if (d == 1) && (numel(X) > 1) && (sz(1) == n)
        X0 = X0';
        [n,d] = size(X0);
    end
    
    % Data and mean are a single row, rep them out to match covariance
    if n == 1 % already know size(Sigma,3) > 1
        n = sz(3);
        X0 = repmat(X0,n,1); % rep centered data out to match cov
    end

    % Make sure Sigma is the right size
    if sz(1) ~= sz(2)
        error(message('stats:mvnpdf:BadCovarianceMultiple'));
    elseif (sz(1) ~= d) || (sz(2) ~= d) % Sigma is a stack of dxd matrices
        error(message('stats:mvnpdf:CovSizeMismatchMultiple'));
    elseif sz(3) ~= n
        error(message('stats:mvnpdf:CovSizeMismatchPages'));
    else
        if sigmaIsDiag
            if any(any(Sigma<=0))
                error(message('stats:mvnpdf:BadDiagSigma'));
            end
            R = sqrt(Sigma);
            xRinv = X0./R;
            logSqrtDetSigma = sum(log(R),2);
        else
            % Create array of standardized data, and vector of log(sqrt(det(Sigma)))
            xRinv = zeros(n,d,superiorfloat(X0,Sigma));
            logSqrtDetSigma = zeros(n,1,class(Sigma));
            for i = 1:n
                % Make sure Sigma is a valid covariance matrix
                [R,err] = cholcov(Sigma(:,:,i),0);
                if err ~= 0
                    error(message('stats:mvnpdf:BadMatrixSigmaMultiple'));
                end
                xRinv(i,:) = X0(i,:) / R;
                logSqrtDetSigma(i) = sum(log(diag(R)));
            end
        end
    end
   
elseif ndims(Sigma) > 3
    error(message('stats:mvnpdf:BadCovariance'));
end

% The quadratic form is the inner products of the standardized data
quadform = sum(xRinv.^2, 2);

y = exp(-0.5*quadform - logSqrtDetSigma - d*log(2*pi)/2);
           

繼續閱讀