天天看點

ID3算法 C++實作

思想:

用dfs每次都在前一次的基礎上更新表,對于每層都判斷是否是底層

(代碼有點辣雞qwq)

#include <bits/stdc++.h>

using namespace std;
typedef pair<int,int>P;

const int INF=0x3f3f3f3f;
const int N=25,mod=32767;

string sep="     ";
vector<string>now[6],tmp[6],vec[6];
vector<string>type[6];
int vis[6];
string name[6]={" ","weather","temperature","humidity","windy"};

double cal(double a,double b){
    double c=a+b;
    if(c==a||c==b)return 0;
    double t1=log(a/c)/log(2);
    double t2=log(b/c)/log(2);
    return -(a/c)*t1-(b/c)*t2;
}

double Info(vector<string>now[6],int k){
    double res=0;
    for(int i=0;i<type[k].size();i++){//表示類型
        double c1=0,c2=0,tot=now[k].size();
        string s=type[k][i];
        for(int j=0;j<now[k].size();j++){
            if(now[k][j]==s){
                c1++;
                if(now[5][j]=="no")c2++;
            }
        }
        res+=(c1/tot)*cal(c1-c2,c2);
    }
    return res;
}

void init(){
    type[1].push_back("sunny");
    type[1].push_back("overcast");
    type[1].push_back("rainy");
    type[2].push_back("hot");
    type[2].push_back("mild");
    type[2].push_back("cool");
    type[3].push_back("high");
    type[3].push_back("normal");
    type[4].push_back("weak");
    type[4].push_back("strong");
    type[5].push_back("yes");
    type[5].push_back("no");
}

void build(vector<string>now[6],vector<string>tmp[6],int k,string s){
    for(int i=1;i<=5;i++)tmp[i].clear();
    for(int i=0;i<now[k].size();i++){
        if(now[k][i]==s){//這條要加上
            for(int j=1;j<=5;j++){
                tmp[j].push_back(now[j][i]);
            }
        }
    }
}


int judge(vector<string>tmp[6],int u,string s){
    int c1=0,c2=0,tot=0;
    for(int i=0;i<tmp[u].size();i++){
        if(tmp[u][i]==s){
            tot++;
            if(tmp[5][i]=="yes")c1++;
            else c2++;
        }
    }

    if(c1==tot)return 1;
    else if(c2==tot)return 0;
    else return -1;
}

void ID3(int u,int dep,vector<string>now[6],vector<string>tmp[6]){
    for(int i=0;i<dep;i++)cout<<sep;
    cout<<name[u]<<endl;

    for(int i=0;i<type[u].size();i++){
        build(now,tmp,u,type[u][i]);//重建立好圖
        int flag=judge(tmp,u,type[u][i]);//根據每個屬性判斷,是否這個屬性,對應的值都相同
        for(int i=0;i<=dep;i++)cout<<sep;
        cout<<type[u][i];
        if(flag!=-1){
            if(flag==1)cout<<"  yes"<<endl;
            else cout<<"  no"<<endl;
            continue;
        }
        else{
            cout<<endl;
        }
        int ans;
        double mi=INF;
        for(int j=1;j<5;j++){
            if(vis[j]==1)continue;//這個已經算過了
            double c=Info(tmp,j);
            if(c<mi){//求資訊期望最小,那麼資訊增益就最大
                mi=c;
                ans=j;
            }
        }
        vis[ans]=1;
        ID3(ans,dep+2,tmp,vec);
        vis[ans]=0;
    }
}

int main(){
    int t;
    cin>>t;
    init();
    string s;
    for(int i=1;i<=t;i++){
        for(int j=1;j<=5;j++){
            cin>>s;
            now[j].push_back(s);
            tmp[j].push_back(s);
        }
    }

    int ans;
    double mi=INF;
    for(int i=1;i<5;i++){
        double c=Info(tmp,i);
        if(c<mi){//求資訊期望最小,那麼資訊增益就最大
            mi=c;
            ans=i;
        }
    }
    vis[ans]=1;
    cout<<"The decision tree is :"<<endl;
    ID3(ans,0,now,tmp);
}
/*
14
sunny hot high weak no
sunny hot high strong no
overcast hot high weak yes
rainy mild high weak yes
rainy cool normal weak yes
rainy cool normal strong no
overcast cool normal strong yes
sunny mild high weak no
sunny cool normal weak yes
rainy mild normal weak yes
sunny mild normal strong yes
overcast mild high strong yes
overcast hot normal weak yes
rainy mild high strong no
*/
           

運作結果:

ID3算法 C++實作