Java实现的决策树算法完整实例-eolink官网

Java实现的决策树算法完整实例

本文实例讲述了java实现的决策树算法。分享给大家供大家参考，具体如下：

决策树算法是一种逼近离散函数值的方法。它是一种典型的分类方法，首先对数据进行处理，利用归纳算法生成可读的规则和决策树，然后使用决策对新数据进行分析。本质上决策树是通过一系列规则对数据进行分类的过程。

决策树构造可以分两步进行。第一步，决策树的生成：由训练样本集生成决策树的过程。一般情况下，训练样本数据集是根据实际需要有历史的、有一定综合程度的，用于数据分析处理的数据集。第二步，决策树的剪枝：决策树的剪枝是对上一阶段生成的决策树进行检验、校正和修下的过程，主要是用新的样本数据集（称为测试数据集）中的数据校验决策树生成过程中产生的初步规则，将那些影响预衡准确性的分枝剪除。

java实现代码如下：

package demo;

import java.util.HashMap;

import java.util.LinkedList;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import java.util.Set;

public class DicisionTree {

public static void main(String[] args) throws Exception {

System.out.print("我们测试结果：");

String[] attrNames = new String[] { "AGE", "INCOME", "STUDENT",

"CREDIT_RATING" };

// 读取样本集

Map

// 生成决策树

Object dKeQdhNAapecisionTree = generateDecisionTree(samples, attrNames);

// 输出决策树

outputDecisionTree(decisionTree, 0, null);

}

/**

* 读取已分类的样本集，返回Map：分类 -> 属于该分类的样本的列表

static Map

// 样本属性及其所属分类（数组中的最后一个元素为样本所属分类）

Object[][] rawData = new Object[][] {

{ "<30 ", "High ", "No ", "Fair ", "0" },

{ "<30 ", "High ", "No ", "Excellent", "0" },

{ "30-40", "High ", "No ", "Fair ", "1" },

{ ">40 ", "Medium", "No ", "Fair ", "1" },

{ ">40 ", "Low ", "Yes", "Fair ", "1" },

{ ">40 ", "Low ", "Yes", "Excellent", "0" },

{ "30-40", "Low ", "Yes", "Excellent", "1" },

{ "<30 ", "Medium", "No ", "Fair ", "0" },

{ "<30 ", "Low ", "Yes", "Fair ", "1" },

{ ">40 ", "Medium", "Yes", "Fair ", "1" },

{ "<30 ", "Medium", "Yes", "Excellent", "1" },

{ "30-40", "Medium", "No ", "Excellent", "1" },

{ "30-40", "High ", "Yes", "Fair ", "1" },

{ ">40 ", "Medium", "No ", "Excellent", "0" } };

// 读取样本属性及其所属分类，构造表示样本的Sample对象，并按分类划分样本集

Map

for (Object[] row : rawData) {

Sample sample = new Sample();

int i = 0;

for (int n = row.length - 1; i < n; i++)

sample.setAttribute(attrNames[i], row[i]);

sample.setCategory(row[i]);

List samples = ret.get(row[i]);

if (samples == null) {

samples = new LinkedList();

ret.put(row[i], samples);

}

samples.add(sample);

}

return ret;

}

/**

* 构造决策树

static Object generateDecisionTree(

Map

// 如果只有一个样本，将该样本所属分类作为新样本的分类

if (categoryToSamples.size() == 1)

return categoryToSamples.keySet().iterator().next();

// 如果没有供决策的属性，则将样本集中具有最多样本的分类作为新样本的分类，即投票选举出分类

if (attrNames.length == 0) {

int max = 0;

Object maxCategory = null;

for (Entry

.entrySet()) {

int cur = entry.getValue().size();

if (cur > max) {

max = cur;

maxCategory = entry.getKey();

}

return maxCategory;

}

// 选取测试属性

Object[] rst = chooseBestTestAttribute(categoryToSamples, attrNames);

// 决策树根结点，分支属性为选取的测试属性

Tree tree = new Tree(attrNames[(Integer) rst[0]]);

// 已用过的测试属性不应再次被选为测试属性

String[] subA = new String[attrNames.length - 1];

for (int i = 0, j = 0; i < attrNames.length; i++)

if (i != (Integer) rst[0])

subA[j++] = attrNames[i];

// 根据分支属性生成分支

@SuppressWarnings("unchecked")

Map

/* NEW LINE */(Map

for (Entry

Object attrValue = entry.getKey();

Map

Object child = generateDecisionTree(split, subA);

tree.setChild(attrValue, child);

}

return tree;

}

/**

* 选取最优测试属性。最优是指如果根据选取的测试属性分支，则从各分支确定新样本

* 的分类需要的信息量之和最小，这等价于确定新样本的测试属性获得的信息增益最大

* 返回数组：选取的属性下标、信息量之和、Map(属性值->(分类->样本列表))

static Object[] chooseBestTestAttribute(

Map

int minIndex = -1; // 最优属性下标

double minValue = Double.MAX_VALUE; // 最小信息量

Map

// 对每一个属性，计算将其作为测试属性的情况下在各分支确定新样本的分类需要的信息量之和，选取最小为最优

for (int attrIndex = 0; attrIndex < attrNames.length; attrIndex++) {

int allCount = 0; // 统计样本总数的计数器

// 按当前属性构建Map：属性值->(分类->样本列表)

Map

/* NEW LINE */new HashMap

for (Entry

.entrySet()) {

Object category = entry.getKey();

List samples = entry.getValue();

for (Sample sample : samples) {

Object attrValue = sample

.getAttribute(attrNames[attrIndex]);

Map

if (split == null) {

split = new HashMap

curSplits.put(attrValue, split);

}

List splitSamples = split.get(category);

if (splitSamples == null) {

splitSamples = new LinkedList();

split.put(category, splitSamples);

}

splitSamples.add(sample);

}

allCount += samples.size();

}

// 计算将当前属性作为测试属性的情况下在各分支确定新样本的分类需要的信息量之和

double curValue = 0.0; // 计数器：累加各分支

for (Map

double perSplitCount = 0;

for (List list : splits.values())

perSplitCount += list.size(); // 累计当前分支样本数

double perSplitValue = 0.0; // 计数器：当前分支

for (List list : splits.values()) {

double p = list.size() / perSplitCount;

perSplitValue -= p * (Math.log(p) / Math.log(2));

}

curValue += (perSplitCount / allCount) * perSplitValue;

}

// 选取最小为最优

if (minValue > curValue) {

minIndex = attrIndex;

minValue = curValue;

minSplits = curSplits;

}

return new Object[] { minIndex, minValue, minSplits };

}

/**

* 将决策树输出到标准输出

static void outputDecisionTree(Object obj, int level, Object from) {

for (int i = 0; i < level; i++)

System.out.print("|-----");

if (from != null)

System.out.printf("(%s):", from);

if (obj instanceof Tree) {

Tree tree = (Tree) obj;

String attrName = tree.getAttribute();

System.out.printf("[%s = ?]\n", attrName);

for (Object attrValue : tree.getAttributeValues()) {

Object child = tree.getChild(attrValue);

outputDecisionTree(child, level + 1, attrName + " = "

+ attrValue);

}

} else {

System.out.printf("[CATEGORY = %s]\n", obj);

}

/**

* 样本，包含多个属性和一个指明样本所属分类的分类值

static class Sample {

private Map attributes = new HashMap();

private Object category;

public Object getAttribute(String name) {

return attributes.get(name);

}

public void setAttribute(String name, Object value) {

attributes.put(name, value);

}

public Object getCategory() {

return category;

}

public void setCategory(Object category) {

this.category = category;

}

public String toString() {

return attributes.toString();

}

/**

* 决策树（非叶结点），决策树中的每个非叶结点都引导了一棵决策树

* 每个非叶结点包含一个分支属性和多个分支，分支属性的每个值对应一个分支，该分支引导了一棵子决策树

static class Tree {

private String attribute;

private Map

public Tree(String attribute) {

this.attribute = attribute;

}

public String getAttribute() {

return attribute;

}

public Object getChild(Object attrValue) {

return children.get(attrValue);

}

public void setChild(Object attrValue, Object child) {

children.put(attrValue, child);

}

public Set

return children.keySet();

}

运行结果：

更多关于java算法相关内容感兴趣的读者可查看本站专题：《Java数据结构与算法教程》、《Java操作DOM节点技巧总结》、《Java文件与目录操作技巧汇总》和《Java缓存操作技巧汇总》

希望本文所述对大家java程序设计有所帮助。

Iterator与LIstIterator接口在java中的区别有哪些

586 2023-03-14

Java实现的决策树算法完整实例

Flask接口签名sign原理与实例代码浅析

vue项目接口域名动态的获取方法

Iterator与LIstIterator接口在java中的区别有哪些

推荐文章

接口调用是什么意思？几种常用接口调用方式

接口设计原则

8款在线 API 接口文档管理工具

api管理系统是什么？

什么是接口调试？接口调试的步骤有哪些？

api 接口管理系统有哪些？

接口测试有几种测试方法

API文档生成工具有哪些？

微服务和api网关区别

交换机配置步骤

最近发表

热评文章

在线接口文档管理工具推荐，支持在线测试，HTTP接口

开源的在线接口文档wiki工具Mindoc的介绍与使

如何优雅的进行接口设计？接口设计的六大原则是什么？

什么是API测试,api检测公司

软件接口设计怎么做？前后端分离软件接口设计思路

接口管理平台推荐，几大接口管理平台总有一款适合你！