天天看点

计算信息熵的简单做法

信息熵的简单做法

p*log(p)其中log函数是以2为底的函数,在C语言中,计算概率是可以应用换底公式来计算上面的公式。

直接贴代码了:

#include <map>
#include <cmath>
#include <iostream>
#include <string>
#include <stdio.h>

int GetSumCnt(std::map<std::string, int> &smap)
{
    int cnt_sum = ;
    std::map<std::string, int>::iterator iter;
    for(iter = smap.begin(); iter != smap.end(); ++ iter)
        cnt_sum += iter->second;
    return cnt_sum;
}

int main()
{
    float score = , p_tmp = ;
    int cnt = ;
    int cnt_sum = ;
    std::string str;
    std::map<std::string, int> smap;
    std::map<std::string, int>::iterator iter;

    str = "baidu";
    cnt = ;
    smap[str] += cnt;
    score = ;
    cnt_sum = GetSumCnt(smap);
    for(iter = smap.begin(); iter != smap.end(); ++ iter)
    {
        p_tmp = ((float)iter->second) / ((float)cnt_sum);
        printf("The p_tmp is %f\n", p_tmp);
        score -= p_tmp * (log(p_tmp) / log());
    }
    printf("The score is %f\n", score);


    str = "beijing";
    cnt = ;
    smap[str] += cnt;
    score = ;
    cnt_sum = GetSumCnt(smap);
    for(iter = smap.begin(); iter != smap.end(); ++ iter)
    {
        p_tmp = ((float)iter->second) / ((float)cnt_sum);
        printf("The p_tmp is %f\n", p_tmp);
        score -= p_tmp * (log(p_tmp) / log());
    }
    printf("The score is %f\n", score);

    str = "beijing";
    cnt = ;
    smap[str] += cnt;
    score = ;
    cnt_sum = GetSumCnt(smap);
    for(iter = smap.begin(); iter != smap.end(); ++ iter)
    {
        p_tmp = ((float)iter->second) / ((float)cnt_sum);
        printf("The p_tmp is %f\n", p_tmp);
        score -= p_tmp * (log(p_tmp) / log());
    }
    printf("The score is %f\n", score);
    return ;
}
           

在此,主要想记录的是换底公式,在写代码的时候,可以让代码更加方便简洁一些。

继续阅读