-
问题描述
编程实现K均值算法对iris和sonar数据进行聚类。
-
基本原理
-
实验结果
-
MATLAB核心代码展示
1、Kmeans函数
% Kmeans_.m
function [a,m] = Kmeans_(dataset,k)
color=['r.';'b.';'g.';'m.'];
for i=1:round(10/3)
color=[color;color];
end
[n,p]=size(dataset);
p=p-1;
plotindex=unidrnd(p,1,2); % 随机选两维特征画图
r=randperm(n); %生成关于行数的随机排列行数序列
dataset=dataset(r, :); %根据这个序列进行重新排序
data=dataset(:,1:end-1);
m=data(1:k,:); % 初始化样本点中心
class=ones([1,n])*(-1); % 分类结果
flag=1;
while flag
% 按样本中心点分类
for i=1:n
d=zeros([1,k]);
for j=1:k
d(j)=Euclidean(data(i,:),m(j,:));
end
[value,index]=min(d);
class(i)=index;
end
newdataset=[dataset,class'];
newdataset=sort_label(newdataset);
dataset=newdataset(:,1:end-1); % 储存标签
newdataset=[dataset(:,1:end-1),newdataset(:,end)];
% 更新样本点中心
old_m=m;
classset=[]; % 同类集
theclass=newdataset(1,end); % 当前类
for i=1:n
classset=[classset;newdataset(i,1:end-1)];
if i~=n && newdataset(i+1,end) ~= theclass
m(theclass,:)=mean(classset);
classset=[];
theclass=newdataset(i+1,end);
elseif i==n
m(theclass,:)=mean(classset);
classset=[];
end
end
Euclidean(m,old_m)
flag = Euclidean(m,old_m)>0.1;
data=newdataset(:,1:end-1);
end
for i=1:n
d=zeros([1,k]);
for j=1:k
d(j)=Euclidean(data(i,:),m(j,:));
end
[value,index]=min(d);
class(i)=index;
end
newdataset=[dataset,class'];
newdataset=sort_label(newdataset);
dataset=newdataset(:,1:end-1); % 储存标签
newdataset=[dataset(:,1:end-1),newdataset(:,end)];
data=newdataset(:,1:end-1);
acc=[];
for j=1:k
d=zeros([1,n]);
for i=1:n
d(i)=Euclidean(data(i,:),m(j,:));
end
[value,index]=min(d);
a=dataset(index,end);
for i=1:n
if newdataset(i,end)==j
acc(end+1)=dataset(i,end)==a;
end
end
end
a=mean(acc);
figure()
hold on
for i=1:n
plot(m(:,1)',m(:,2)','ko')
for j=1:k
if newdataset(i,end)==j
plot(dataset(i,plotindex(1)),dataset(i,plotindex(2)),color(j,:))
end
end
end
hold off
figure()
hold on
for i=1:n
for j=1:k
if dataset(i,end)==j
plot(dataset(i,plotindex(1)),dataset(i,plotindex(2)),color(j,:))
end
end
end
hold off
end
2、run.m
% run.m
clc,clear
% 导入sonar数据
filename = 'sonar.csv';
dataset_sonar = csvread(filename);
dataset_sonar = sort_label(dataset_sonar);
% 导入iris数据
filename = 'iris.csv';
dataset_iris = csvread(filename);
dataset_iris = sort_label(dataset_iris);
[acc,m]=Kmeans_(dataset_iris,3);
[acc,m]=Kmeans_(dataset_sonar,2);
3、图像分割
clear all;
clc;
colors=[255,0,0;255,255,0;0,0,255;0,255,0;255,0,255;0,255,255;160,32,240;255,128,0;169,169,169];
nColors= 5; %分割的区域个数
I_rgb=imread('bus.jpg');
[nrows,ncols,col]= size(I_rgb);
%去噪
filter=ones(5,5);
filter=filter/sum(filter(:));
I_r=I_rgb(:,:,1);
I_g=I_rgb(:,:,2);
I_b=I_rgb(:,:,3);
denoised_r=conv2(I_r,filter,'same');
denoised_g=conv2(I_g,filter,'same');
denoised_b=conv2(I_b,filter,'same');
denoised_rgb=cat(3, denoised_r, denoised_g, denoised_b);
D_rgb=uint8(denoised_rgb); % 强制为0~255
figure();imshow(D_rgb);title('去噪后图像');%去噪后的结果
imggray = rgb2gray(D_rgb);
figure();imshow(imggray);title('gray');
%将彩色图像从RGB转化到lab彩色空间
C = makecform('srgb2lab'); %设置转换格式
I_lab = applycform(I_rgb, C);
figure();imshow(I_lab);
ab = I_lab;
ab =double(ab);
ab =reshape(ab,nrows*ncols,3);
[cluster_idx,cluster_center] =kmeans(ab,nColors,'distance','sqEuclidean','Replicates',2); pixel_labels= reshape(cluster_idx,nrows,ncols);
result=I_rgb;
for i=1:nrows
for j=1:ncols
result(i,j,:)=colors(pixel_labels(i,j),:);
end
end
figure(),imshow(result)