在掌握基于Weka工具的数据挖掘(分类、回归、聚类、关联规则分析)应用的基础上,实现基于Weka API的数据挖掘程序设计。
1.下载安装JDK 7.0 64位版,Weka 3.7版,Eclipse IDE for Java Developers 4.0以上版本。
2.基于Weka API的数据分类。
3.基于Weka API的数据回归。
4.基于Weka API的数据聚类。
5.基于Weka API的关联规则分析。
(1)JDK与Weka的安装方法与实验1中相同。
(2)从http://www.eclipse.org/home/index.php 下载并安装Eclipse。
(3)在Eclipse中建立一个新的Java工程,用于放置实验程序的源代码。
(4)编程请遵循Java编程规范。规范中文版参见:
http://www.hawstein.com/posts/google-java-style.html。
(a)将数值型字段规范化至[0,1]区间。
(b)调用特征选择算法(Select attributes),选择关键特征。
运行结果如下:



代码如下:
package weka;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.Random;
import weka.attributeSelection.AttributeSelection;
import weka.attributeSelection.BestFirst;
import weka.attributeSelection.CfsSubsetEval;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.evaluation.Evaluation;
import weka.classifiers.functions.MultilayerPerceptron;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomForest;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Discretize;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.NumericToNominal;
import weka.filters.unsupervised.attribute.Remove;
/**
 * 	二、基于Weka API的数据分类
 * @author 西瓜不甜
 *
 */
public class Test_1 {
	
	
	public static void main(String[] args) throws Exception {
		
	/**
	* 1.读取"电费回收数据.csv"
	*/
		
	Instances data = new Instances(new BufferedReader(new FileReader("C:\\Users\\西瓜不甜\\Desktop\\workspace\\weka\\电费回收数据.arff")));
		
        
        
        /**
         * 2.数据预处理
         * (1)删除无用属性
         */
        String[] options = new String[2];
        options[0] = "-R";                                    // remove
        options[1] = "1-2,7,10,12";                           // "CONS_NO", "YMD", "RCVED_DATE", "CUISHOU_COUNT", "YM"
        Remove remove = new Remove();                         
        remove.setOptions(options);                          
        remove.setInputFormat(data);                     	
        data = Filter.useFilter(data, remove);   	  		// apply filter
        
        data.setClassIndex(data.numAttributes() - 1);  		// set class
 
        /**
         * 2.数据预处理
         * (2)归一化
         */
        Normalize norm = new Normalize();  
        norm.setInputFormat(data);  
        data = Filter.useFilter(data, norm);  
        
        /**
         * 2.数据预处理
         * (3)特征选择
         */
        AttributeSelection attSelect = new AttributeSelection();
        CfsSubsetEval eval = new CfsSubsetEval();
        BestFirst search = new BestFirst();
        attSelect.setEvaluator(eval);
        attSelect.setSearch(search);
        attSelect.SelectAttributes(data);
        
        int[] indices = attSelect.selectedAttributes();
        System.out.print("\n Select Attributes : starting with (0): " + Utils.arrayToString(indices) + ‘\t‘);
        
        for (int i = 0; i <indices.length; i++) {
        	int index = indices[i];
        	Attribute attribute = data.attribute(index); 
        	System.out.print(attribute.name() + " ");
        }
        System.out.println();  
        
        
        /**
         * 2.数据预处理
         * (4) 离散化
         */
        Discretize disc = new Discretize();
        disc.setInputFormat(data);
        data = Filter.useFilter(data, disc);
        
        
        /**
         * 2.数据预处理
         * (5) 类型转换
         */
        NumericToNominal nutono = new NumericToNominal();
        nutono.setInputFormat(data);
        data = Filter.useFilter(data, nutono);
        
        
        /**
         * 3.数据分类
         * (0) 取60%为训练集,40%为测试集
         */
        data.randomize(new Random(0));
		int trainSize = (int) Math.round(data.numInstances() * 0.60);
		int testSize = data.numInstances() - trainSize;
		Instances train = new Instances(data, 0, trainSize);
		Instances test = new Instances(data, trainSize, testSize);
		
        /**
         * 3.数据分类
         * (1)决策树(J48)
         */
		
		
        J48 j48 = new J48();							// train classifier
        j48.buildClassifier(train);
        
        Evaluation evaluation;
        evaluation = new Evaluation(train);				// evaluate classifier and print some statistics
        evaluation.evaluateModel(j48, test);
        System.out.println(evaluation.toSummaryString("\n======J48 : Results======\n", false));
        System.out.println(evaluation.toClassDetailsString());
        System.out.println(evaluation.toMatrixString());
        
        
        /**
         * 3.数据分类
         * (2)随机森林(RandomForest)
         */
        RandomForest rf = new RandomForest();			// train classifier
        rf.buildClassifier(train);
        
        evaluation = new Evaluation(train);				// evaluate classifier and print some statistics
        evaluation.evaluateModel(rf, test);
        System.out.println(evaluation.toSummaryString("\n======RandomForest : Results======\n", false));
        System.out.println(evaluation.toClassDetailsString());
        System.out.println(evaluation.toMatrixString());
        
        
        /**
         * 3.数据分类
         * (3)神经网络(MultilayerPerceptron)
         * ps:运行时间约4h,谨慎运行。
         */
        MultilayerPerceptron mp = new MultilayerPerceptron();			// train classifier
        mp.buildClassifier(train);
        
        evaluation = new Evaluation(train);				// evaluate classifier and print some statistics
        evaluation.evaluateModel(mp, test);
        System.out.println(evaluation.toSummaryString("\n======MultilayerPerceptron : Results======\n", false));
        System.out.println(evaluation.toClassDetailsString());
        System.out.println(evaluation.toMatrixString());
        
        /**
         * 3.数据分类
         * (4)朴素贝叶斯(NaiveBayes)
         */
        NaiveBayes nb = new NaiveBayes();			// train classifier
        nb.buildClassifier(train);
        
        evaluation = new Evaluation(train);				// evaluate classifier and print some statistics
        evaluation.evaluateModel(nb, test);
        System.out.println(evaluation.toSummaryString("\n======NaiveBayes : Results======\n", false));
        System.out.println(evaluation.toClassDetailsString());
        System.out.println(evaluation.toMatrixString());
	}
}
(a)将数值型字段规范化至[0,1]区间。
(b)调用特征选择算法(Select attributes),选择关键特征。
运行结果如下:



原文:https://www.cnblogs.com/xgbt/p/13334026.html