天天看点

使用weka的select attribute

原文:http://blog.csdn.net/anqiang1984/article/details/4048177

package com.csdn;

import java.io.File;

import weka.attributeSelection.InfoGainAttributeEval;

import weka.attributeSelection.Ranker;

import weka.classifiers.Classifier;

import weka.core.Instances;

import weka.core.converters.ArffLoader;

public class SimpleAttributeSelection {

    public static void main(String[] args) {

       Instances trainIns = null;

       try{

           File file= new File("C://Program Files//Weka-3-6//data//segment-challenge.arff");

           ArffLoader loader = new ArffLoader();

           loader.setFile(file);

           trainIns = loader.getDataSet();

           //在使用样本之前一定要首先设置instances的classIndex,否则在使用instances对象是会抛出异常

           trainIns.setClassIndex(trainIns.numAttributes()-1);

           Ranker rank = new Ranker();

           InfoGainAttributeEval eval = new InfoGainAttributeEval();

           eval.buildEvaluator(trainIns);

           //System.out.println(rank.search(eval, trainIns));

           int[] attrIndex = rank.search(eval, trainIns);

           StringBuffer attrIndexInfo = new StringBuffer();

           StringBuffer attrInfoGainInfo = new StringBuffer();

           attrIndexInfo.append("Selected attributes:");

           attrInfoGainInfo.append("Ranked attributes:/n");

           for(int i = 0; i < attrIndex.length; i ++){

              attrIndexInfo.append(attrIndex[i]);

              attrIndexInfo.append(",");

              attrInfoGainInfo.append(eval.evaluateAttribute(attrIndex[i]));

              attrInfoGainInfo.append("/t");

              attrInfoGainInfo.append((trainIns.attribute(attrIndex[i]).name()));

              attrInfoGainInfo.append("/n");

           }

           System.out.println(attrIndexInfo.toString());

           System.out.println(attrInfoGainInfo.toString());

       }catch(Exception e){

           e.printStackTrace();

       }

    }

}

在这个实例中,我用了InfoGain的属性选择类来进行特征选择。InfoGainAttributeEval主要是计算出各个属性的InfoGain信息。同时在weka中为属性选择方法配备的有搜索算法(seacher method),在这里我们用最简单的Ranker类。它对属性进行了简单的排序。在Weka中我们还可以对搜索算法设置一些其它的属性,例如设置搜索的属性集,阈值等等,如果有需求大家可以进行详细的设置。

在最后我们打印了一些结果信息,打印了各个属性的InfoGain的信息。

继续阅读