Code/Resource
Windows Develop
Linux-Unix program
Internet-Socket-Network
Web Server
Browser Client
Ftp Server
Ftp Client
Browser Plugins
Proxy Server
Email Server
Email Client
WEB Mail
Firewall-Security
Telnet Server
Telnet Client
ICQ-IM-Chat
Search Engine
Sniffer Package capture
Remote Control
xml-soap-webservice
P2P
WEB(ASP,PHP,...)
TCP/IP Stack
SNMP
Grid Computing
SilverLight
DNS
Cluster Service
Network Security
Communication-Mobile
Game Program
Editor
Multimedia program
Graph program
Compiler program
Compress-Decompress algrithms
Crypt_Decrypt algrithms
Mathimatics-Numerical algorithms
MultiLanguage
Disk/Storage
Java Develop
assembly language
Applications
Other systems
Database system
Embeded-SCM Develop
FlashMX/Flex
source in ebook
Delphi VCL
OS Develop
MiddleWare
MPI
MacOS develop
LabView
ELanguage
Software/Tools
E-Books
Artical/Document
C45ModelSelection.java
Package: Weka-3-2.rar [view]
Upload User: rhdiban
Upload Date: 2013-08-09
Package Size: 15085k
Code Size: 5k
Category:
Windows Develop
Development Platform:
Java
- /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
- /*
- * C45ModelSelection.java
- * Copyright (C) 1999 Eibe Frank
- *
- */
- package weka.classifiers.trees.j48;
- import java.util.*;
- import weka.core.*;
- /**
- * Class for selecting a C4.5-type split for a given dataset.
- *
- * @author Eibe Frank (eibe@cs.waikato.ac.nz)
- * @version $Revision: 1.7 $y
- */
- public class C45ModelSelection extends ModelSelection {
- /** Minimum number of objects in interval. */
- private int m_minNoObj;
- /** All the training data */
- private Instances m_allData; //
- /**
- * Initializes the split selection method with the given parameters.
- *
- * @param minNoObj minimum number of instances that have to occur in at least two
- * subsets induced by split
- * @param allData FULL training dataset (necessary for
- * selection of split points).
- */
- public C45ModelSelection(int minNoObj, Instances allData) {
- m_minNoObj = minNoObj;
- m_allData = allData;
- }
- /**
- * Sets reference to training data to null.
- */
- public void cleanup() {
- m_allData = null;
- }
- /**
- * Selects C4.5-type split for the given dataset.
- */
- public final ClassifierSplitModel selectModel(Instances data){
- double minResult;
- double currentResult;
- C45Split [] currentModel;
- C45Split bestModel = null;
- NoSplit noSplitModel = null;
- double averageInfoGain = 0;
- int validModels = 0;
- boolean multiVal = true;
- Distribution checkDistribution;
- Attribute attribute;
- double sumOfWeights;
- int i;
- try{
- // Check if all Instances belong to one class or if not
- // enough Instances to split.
- checkDistribution = new Distribution(data);
- noSplitModel = new NoSplit(checkDistribution);
- if (Utils.sm(checkDistribution.total(),2*m_minNoObj) ||
- Utils.eq(checkDistribution.total(),
- checkDistribution.perClass(checkDistribution.maxClass())))
- return noSplitModel;
- // Check if all attributes are nominal and have a
- // lot of values.
- if (m_allData != null) {
- Enumeration enum = data.enumerateAttributes();
- while (enum.hasMoreElements()) {
- attribute = (Attribute) enum.nextElement();
- if ((attribute.isNumeric()) ||
- (Utils.sm((double)attribute.numValues(),
- (0.3*(double)m_allData.numInstances())))){
- multiVal = false;
- break;
- }
- }
- }
- currentModel = new C45Split[data.numAttributes()];
- sumOfWeights = data.sumOfWeights();
- // For each attribute.
- for (i = 0; i < data.numAttributes(); i++){
- // Apart from class attribute.
- if (i != (data).classIndex()){
- // Get models for current attribute.
- currentModel[i] = new C45Split(i,m_minNoObj,sumOfWeights);
- currentModel[i].buildClassifier(data);
- // Check if useful split for current attribute
- // exists and check for enumerated attributes with
- // a lot of values.
- if (currentModel[i].checkModel())
- if (m_allData != null) {
- if ((data.attribute(i).isNumeric()) ||
- (multiVal || Utils.sm((double)data.attribute(i).numValues(),
- (0.3*(double)m_allData.numInstances())))){
- averageInfoGain = averageInfoGain+currentModel[i].infoGain();
- validModels++;
- }
- } else {
- averageInfoGain = averageInfoGain+currentModel[i].infoGain();
- validModels++;
- }
- }else
- currentModel[i] = null;
- }
- // Check if any useful split was found.
- if (validModels == 0)
- return noSplitModel;
- averageInfoGain = averageInfoGain/(double)validModels;
- // Find "best" attribute to split on.
- minResult = 0;
- for (i=0;i<data.numAttributes();i++){
- if ((i != (data).classIndex()) &&
- (currentModel[i].checkModel()))
- // Use 1E-3 here to get a closer approximation to the original
- // implementation.
- if ((currentModel[i].infoGain() >= (averageInfoGain-1E-3)) &&
- Utils.gr(currentModel[i].gainRatio(),minResult)){
- bestModel = currentModel[i];
- minResult = currentModel[i].gainRatio();
- }
- }
- // Check if useful split was found.
- if (Utils.eq(minResult,0))
- return noSplitModel;
- // Add all Instances with unknown values for the corresponding
- // attribute to the distribution for the model, so that
- // the complete distribution is stored with the model.
- bestModel.distribution().
- addInstWithUnknown(data,bestModel.attIndex());
- // Set the split point analogue to C45 if attribute numeric.
- if (m_allData != null)
- bestModel.setSplitPoint(m_allData);
- return bestModel;
- }catch(Exception e){
- e.printStackTrace();
- }
- return null;
- }
- /**
- * Selects C4.5-type split for the given dataset.
- */
- public final ClassifierSplitModel selectModel(Instances train, Instances test) {
- return selectModel(train);
- }
- }