Code/Resource
Windows Develop
Linux-Unix program
Internet-Socket-Network
Web Server
Browser Client
Ftp Server
Ftp Client
Browser Plugins
Proxy Server
Email Server
Email Client
WEB Mail
Firewall-Security
Telnet Server
Telnet Client
ICQ-IM-Chat
Search Engine
Sniffer Package capture
Remote Control
xml-soap-webservice
P2P
WEB(ASP,PHP,...)
TCP/IP Stack
SNMP
Grid Computing
SilverLight
DNS
Cluster Service
Network Security
Communication-Mobile
Game Program
Editor
Multimedia program
Graph program
Compiler program
Compress-Decompress algrithms
Crypt_Decrypt algrithms
Mathimatics-Numerical algorithms
MultiLanguage
Disk/Storage
Java Develop
assembly language
Applications
Other systems
Database system
Embeded-SCM Develop
FlashMX/Flex
source in ebook
Delphi VCL
OS Develop
MiddleWare
MPI
MacOS develop
LabView
ELanguage
Software/Tools
E-Books
Artical/Document
JRip.java
Package: Weka-3-2.rar [view]
Upload User: rhdiban
Upload Date: 2013-08-09
Package Size: 15085k
Code Size: 52k
Category:
Windows Develop
Development Platform:
Java
- /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
- /*
- * JRip.java
- * Copyright (C) 2001 Xin Xu, Eibe Frank
- */
- package weka.classifiers.rules;
- import java.util.Enumeration;
- import java.util.Random;
- import java.util.Vector;
- import weka.core.FastVector;
- import weka.core.Instances;
- import weka.core.Instance;
- import weka.core.Attribute;
- import weka.core.AttributeStats;
- import weka.core.Utils;
- import weka.core.OptionHandler;
- import weka.core.Option;
- import weka.core.Copyable;
- import weka.core.WeightedInstancesHandler;
- import weka.core.AdditionalMeasureProducer;
- import weka.core.UnsupportedAttributeTypeException;
- import weka.core.UnsupportedClassTypeException;
- import weka.filters.supervised.attribute.ClassOrder;
- import weka.filters.Filter;
- import weka.classifiers.DistributionClassifier;
- import weka.classifiers.Evaluation;
- /**
- * This class implements a propositional rule learner, Repeated Incremental
- * Pruning to Produce Error Reduction (RIPPER), which is proposed by William
- * W. Cohen as an optimzed version of IREP. <p>
- *
- * The algorithm is briefly described as follows: <p>
- * Initialize RS = {}, and for each class from the less prevalent one to
- * the more frequent one, DO: <p>
- *
- * 1. Building stage: repeat 1.1 and 1.2 until the descrition length (DL)
- * of the ruleset and examples is 64 bits greater than the smallest DL
- * met so far, or there are no positive examples, or the error rate >= 50%.
- * <p>
- * 1.1. Grow phase:<br>
- * Grow one rule by greedily adding antecedents (or conditions) to
- * the rule until the rule is perfect (i.e. 100% accurate). The
- * procedure tries every possible value of each attribute and selects
- * the condition with highest information gain: p(log(p/t)-log(P/T)).
- * <p>
- * 1.2. Prune phase:<br>
- * Incrementally prune each rule and allow the pruning of any
- * final sequences of the antecedents;<br>
- * The pruning metric is (p-n)/(p+n) -- but it's actually
- * 2p/(p+n) -1, so in this implementation we simply use p/(p+n)
- * (actually (p+1)/(p+n+2), thus if p+n is 0, it's 0.5).<p>
- *
- * 2. Optimization stage: after generating the initial ruleset {Ri},
- * generate and prune two variants of each rule Ri from randomized data
- * using procedure 1.1 and 1.2. But one variant is generated from an
- * empty rule while the other is generated by greedily adding antecedents
- * to the original rule. Moreover, the pruning metric used here is
- * (TP+TN)/(P+N).<br>
- * Then the smallest possible DL for each variant and the original rule
- * is computed. The variant with the minimal DL is selected as the final
- * representative of Ri in the ruleset. <br>
- * After all the rules in {Ri} have been examined and if there are still
- * residual positives, more rules are generated based on the residual
- * positives using Building Stage again. <p>
- *
- * 3. Delete the rules from the ruleset that would increase the DL of the
- * whole ruleset if it were in it. and add resultant ruleset to RS. <p>
- *
- * ENDDO<p>
- *
- * Note that there seem to be 2 bugs in the ripper program that would
- * affect the ruleset size and accuracy slightly. This implementation avoids
- * these bugs and thus is a little bit different from Cohen's original
- * implementation. Even after fixing the bugs, since the order of classes with
- * the same frequency is not defined in ripper, there still seems to be
- * some trivial difference between this implementation and the original ripper,
- * especially for audiology data in UCI repository, where there are lots of
- * classes of few instances.<p>
- *
- * If wrapped by other classes, typical usage of this class is:<br>
- *
- * <code>JRip rip = new JRip();
- * Instances data = ... // Data from somewhere
- * double[] orderedClasses = ... // Get the ordered class counts for the data
- * double expFPRate = ... // Calculate the expected FP/(FP+FN) rate
- * double classIndex = ... // The class index for which ruleset is built
- * // DL of default rule, no theory DL, only data DL
- * double defDL = RuleStats.dataDL(expFPRate, 0.0, data.sumOfWeights(),
- * 0.0, orderedClasses[(int)classIndex]);
- *
- * rip.rulesetForOneClass(expFPRate, data, classIndex, defDL);
- * RuleStats rulesetStats = rip.getRuleStats(0);
- *
- * // Can get heaps of information from RuleStats, e.g. combined DL,
- * // simpleStats, etc.
- * double comDL = rulesetStats.combinedDL(expFPRate, classIndex);
- * int whichRule = ... // Want simple stats of which rule?
- * double[] simpleStats = rulesetStats.getSimpleStats(whichRule);
- * ...
- * </code>
- *
- * Details please see "Fast Effective Rule Induction", William W. Cohen,
- * 'Machine Learning: Proceedings of the Twelfth International Conference'
- * (ML95). <p>
- *
- * PS. We have compared this implementation with the original ripper
- * implementation in aspects of accuracy, ruleset size and running time
- * on both artificial data "ab+bcd+defg" and UCI datasets. In all these
- * aspects it seems to be quite comparable to the original ripper
- * implementation. However, we didn't consider memory consumption
- * optimization in this implementation.<p>
- *
- * @author Xin Xu (xx5@cs.waikato.ac.nz)
- * @author Eibe Frank (eibe@cs.waikato.ac.nz)
- * @version $Revision: 1.7 $
- */
- public class JRip extends DistributionClassifier
- implements OptionHandler,
- AdditionalMeasureProducer,
- WeightedInstancesHandler{
- /** The limit of description length surplus in ruleset generation */
- private static double MAX_DL_SURPLUS = 64.0;
- /** The class attribute of the data*/
- private Attribute m_Class;
- /** The ruleset */
- private FastVector m_Ruleset;
- /** The predicted class distribution */
- private FastVector m_Distributions;
- /** Runs of optimizations */
- private int m_Optimizations = 2;
- /** Random object used in this class */
- private Random m_Random = null;
- /** # of all the possible conditions in a rule */
- private double m_Total = 0;
- /** The seed to perform randomization */
- private long m_Seed = 1;
- /** The number of folds to split data into Grow and Prune for IREP */
- private int m_Folds = 3;
- /** The minimal number of instance weights within a split*/
- private double m_MinNo = 2.0;
- /** Whether in a debug mode */
- private boolean m_Debug = false;
- /** Whether check the error rate >= 0.5 in stopping criteria */
- private boolean m_CheckErr = true;
- /** Whether use pruning, i.e. the data is clean or not */
- private boolean m_UsePruning = true;
- /** The filter used to randomize the class order */
- private Filter m_Filter = null;
- /** The RuleStats for the ruleset of each class value */
- private FastVector m_RulesetStats;
- /**
- * Returns an enumeration describing the available options
- * Valid options are: <p>
- *
- * -F number <br>
- * The number of folds for reduced error pruning. One fold is
- * used as the pruning set. (Default: 3) <p>
- *
- * -N number <br>
- * The minimal weights of instances within a split.
- * (Default: 2) <p>
- *
- * -O number <br>
- * Set the number of runs of optimizations. (Default: 2)<p>
- *
- * -D <br>
- * Whether turn on the debug mode
- *
- * -S number <br>
- * The seed of randomization used in Ripper.(Default: 1)<p>
- *
- * -E <br>
- * Whether NOT check the error rate >= 0.5 in stopping criteria.
- * (default: check)<p>
- *
- * -P <br>
- * Whether NOT use pruning. (default: use pruning)<p>
- *
- * @return an enumeration of all the available options
- */
- public Enumeration listOptions() {
- Vector newVector = new Vector(3);
- newVector.addElement(new Option("tSet number of folds for REPn" +
- "tOne fold is used as pruning set.n" +
- "t(default 3)","F", 1, "-F <number of folds>"));
- newVector.addElement(new Option("tSet the minimal weights of instancesn" +
- "twithin a split.n" +
- "t(default 2.0)","N", 1, "-N <min. weights>"));
- newVector.addElement(new Option("tSet the number of runs ofn"+
- "toptimizations. (Default: 2)", "O",
- 1,"-O <number of runs>"));
- newVector.addElement(new Option("tSet whether turn on then"+
- "tdebug mode (Default: false)", "D",
- 0,"-D"));
- newVector.addElement(new Option("tThe seed of randomizationn"+
- "t(Default: 1)", "S",
- 1,"-S <seed>"));
- newVector.addElement(new Option("Whether NOT check the error rate>=0.5n"
- +"tin stopping criteria "
- +"t(default: check)", "E",
- 0, "-E"));
- newVector.addElement(new Option("Whether NOT use pruningn"
- +"t(default: use pruning)", "P",
- 0, "-P"));
- return newVector.elements();
- }
- /**
- * Parses a given list of options.
- *
- * @param options the list of options as an array of strings
- * @exception Exception if an option is not supported
- */
- public void setOptions(String[] options) throws Exception{
- String numFoldsString = Utils.getOption('F', options);
- if (numFoldsString.length() != 0)
- m_Folds = Integer.parseInt(numFoldsString);
- else
- m_Folds = 3;
- String minNoString = Utils.getOption('N', options);
- if (minNoString.length() != 0)
- m_MinNo = Double.parseDouble(minNoString);
- else
- m_MinNo = 2.0;
- String seedString = Utils.getOption('S', options);
- if (seedString.length() != 0)
- m_Seed = Long.parseLong(seedString);
- else
- m_Seed = 1;
- String runString = Utils.getOption('O', options);
- if (runString.length() != 0)
- m_Optimizations = Integer.parseInt(runString);
- else
- m_Optimizations = 2;
- m_Debug = Utils.getFlag('D', options);
- m_CheckErr = !Utils.getFlag('E', options);
- m_UsePruning = !Utils.getFlag('P', options);
- }
- /**
- * Gets the current settings of the Classifier.
- *
- * @return an array of strings suitable for passing to setOptions
- */
- public String [] getOptions() {
- String [] options = new String [11];
- int current = 0;
- options[current++] = "-F"; options[current++] = "" + m_Folds;
- options[current++] = "-N"; options[current++] = "" + m_MinNo;
- options[current++] = "-O"; options[current++] = "" + m_Optimizations;
- options[current++] = "-S"; options[current++] = "" + m_Seed;
- if(m_Debug)
- options[current++] = "-D";
- if(!m_CheckErr)
- options[current++] = "-E";
- if(!m_UsePruning)
- options[current++] = "-P";
- while(current < options.length)
- options[current++] = "";
- return options;
- }
- /**
- * Returns an enumeration of the additional measure names
- * @return an enumeration of the measure names
- */
- public Enumeration enumerateMeasures() {
- Vector newVector = new Vector(1);
- newVector.addElement("measureNumRules");
- return newVector.elements();
- }
- /**
- * Returns the value of the named measure
- * @param measureName the name of the measure to query for its value
- * @return the value of the named measure
- * @exception IllegalArgumentException if the named measure is not supported
- */
- public double getMeasure(String additionalMeasureName) {
- if (additionalMeasureName.compareTo("measureNumRules") == 0)
- return m_Ruleset.size();
- else
- throw new IllegalArgumentException(additionalMeasureName+" not supported (RIPPER)");
- }
- public void setFolds(int fold){ m_Folds = fold; }
- public int getFolds(){ return m_Folds; }
- public void setMinNo(double m){ m_MinNo = m; }
- public double getMinNo(){ return m_MinNo; }
- public void setSeed(long s){ m_Seed = s; }
- public long getSeed(){ return m_Seed; }
- public void setOptimizations(int run){ m_Optimizations = run; }
- public int getOptimizations(){ return m_Optimizations; }
- public void setDebug(boolean d){m_Debug = d;}
- public boolean getDebug(){ return m_Debug; }
- public void setCheckErrorRate(boolean d){ m_CheckErr = d;}
- public boolean getCheckErrorRate(){ return m_CheckErr; }
- public void setUsePruning(boolean d){ m_UsePruning = d;}
- public boolean getUsePruning(){ return m_UsePruning; }
- /**
- * Get the ruleset generated by Ripper
- *
- * @return the ruleset
- */
- public FastVector getRuleset(){ return m_Ruleset; }
- /**
- * Get the statistics of the ruleset in the given position
- *
- * @param pos the position of the stats, assuming correct
- */
- public RuleStats getRuleStats(int pos) {
- return (RuleStats)m_RulesetStats.elementAt(pos);
- }
- /**
- * The single antecedent in the rule, which is composed of an attribute and
- * the corresponding value. There are two inherited classes, namely NumericAntd
- * and NominalAntd in which the attributes are numeric and nominal respectively.
- */
- private abstract class Antd
- implements WeightedInstancesHandler, Copyable{
- /* The attribute of the antecedent */
- protected Attribute att;
- /* The attribute value of the antecedent.
- For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
- protected double value;
- /* The maximum infoGain achieved by this antecedent test
- * in the growing data */
- protected double maxInfoGain;
- /* The accurate rate of this antecedent test on the growing data */
- protected double accuRate;
- /* The coverage of this antecedent in the growing data */
- protected double cover;
- /* The accurate data for this antecedent in the growing data */
- protected double accu;
- /* Constructor*/
- public Antd(Attribute a){
- att=a;
- value=Double.NaN;
- maxInfoGain = 0;
- accuRate = Double.NaN;
- cover = Double.NaN;
- accu = Double.NaN;
- }
- /* The abstract members for inheritance */
- public abstract Instances[] splitData(Instances data, double defAcRt,
- double cla);
- public abstract boolean covers(Instance inst);
- public abstract String toString();
- /** Implements Copyable */
- public abstract Object copy();
- /* Get functions of this antecedent */
- public Attribute getAttr(){ return att; }
- public double getAttrValue(){ return value; }
- public double getMaxInfoGain(){ return maxInfoGain; }
- public double getAccuRate(){ return accuRate; }
- public double getAccu(){ return accu; }
- public double getCover(){ return cover; }
- }
- /**
- * The antecedent with numeric attribute
- */
- private class NumericAntd extends Antd{
- /* The split point for this numeric antecedent */
- private double splitPoint;
- /* Constructor*/
- public NumericAntd(Attribute a){
- super(a);
- splitPoint = Double.NaN;
- }
- /* Get split point of this numeric antecedent */
- public double getSplitPoint(){ return splitPoint; }
- /** Implements Copyable */
- public Object copy(){
- NumericAntd na = new NumericAntd(getAttr());
- na.value = this.value;
- na.splitPoint = this.splitPoint;
- return na;
- }
- /**
- * Implements the splitData function.
- * This procedure is to split the data into two bags according
- * to the information gain of the numeric attribute value
- * The maximum infoGain is also calculated.
- *
- * @param insts the data to be split
- * @param defAcRt the default accuracy rate for data
- * @param cl the class label to be predicted
- * @return the array of data after split
- */
- public Instances[] splitData(Instances insts, double defAcRt,
- double cl){
- Instances data = insts;
- int total=data.numInstances();// Total number of instances without
- // missing value for att
- int split=1; // Current split position
- int prev=0; // Previous split position
- int finalSplit=split; // Final split position
- maxInfoGain = 0;
- value = 0;
- double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
- data.sort(att);
- // Find the las instance without missing value
- for(int x=0; x<data.numInstances(); x++){
- Instance inst = data.instance(x);
- if(inst.isMissing(att)){
- total = x;
- break;
- }
- sndCover += inst.weight();
- if(Utils.eq(inst.classValue(), cl))
- sndAccu += inst.weight();
- }
- if(total == 0) return null; // Data all missing for the attribute
- splitPoint = data.instance(total-1).value(att);
- for(; split <= total; split++){
- if((split == total) ||
- (data.instance(split).value(att) > // Can't split within
- data.instance(prev).value(att))){ // same value
- for(int y=prev; y<split; y++){
- Instance inst = data.instance(y);
- fstCover += inst.weight();
- if(Utils.eq(data.instance(y).classValue(), cl)){
- fstAccu += inst.weight(); // First bag positive# ++
- }
- }
- double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0),
- sndAccuRate = (sndAccu+1.0)/(sndCover+1.0);
- /* Which bag has higher information gain? */
- boolean isFirst;
- double fstInfoGain, sndInfoGain;
- double accRate, infoGain, coverage, accurate;
- fstInfoGain =
- //Utils.eq(defAcRt, 1.0) ?
- //fstAccu/(double)numConds :
- fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt));
- sndInfoGain =
- //Utils.eq(defAcRt, 1.0) ?
- //sndAccu/(double)numConds :
- sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt));
- if(fstInfoGain > sndInfoGain){
- isFirst = true;
- infoGain = fstInfoGain;
- accRate = fstAccuRate;
- accurate = fstAccu;
- coverage = fstCover;
- }
- else{
- isFirst = false;
- infoGain = sndInfoGain;
- accRate = sndAccuRate;
- accurate = sndAccu;
- coverage = sndCover;
- }
- /* Check whether so far the max infoGain */
- if(infoGain > maxInfoGain){
- splitPoint = data.instance(prev).value(att);
- value = (isFirst) ? 0 : 1;
- accuRate = accRate;
- accu = accurate;
- cover = coverage;
- maxInfoGain = infoGain;
- finalSplit = (isFirst) ? split : prev;
- }
- for(int y=prev; y<split; y++){
- Instance inst = data.instance(y);
- sndCover -= inst.weight();
- if(Utils.eq(data.instance(y).classValue(), cl)){
- sndAccu -= inst.weight(); // Second bag positive# --
- }
- }
- prev=split;
- }
- }
- /* Split the data */
- Instances[] splitData = new Instances[2];
- splitData[0] = new Instances(data, 0, finalSplit);
- splitData[1] = new Instances(data, finalSplit, total-finalSplit);
- return splitData;
- }
- /**
- * Whether the instance is covered by this antecedent
- *
- * @param inst the instance in question
- * @return the boolean value indicating whether the instance is covered
- * by this antecedent
- */
- public boolean covers(Instance inst){
- boolean isCover=true;
- if(!inst.isMissing(att)){
- if((int)value == 0){ // First bag
- if(inst.value(att) > splitPoint)
- isCover=false;
- }
- else if(inst.value(att) < splitPoint) // Second bag
- isCover=false;
- }
- else
- isCover = false;
- return isCover;
- }
- /**
- * Prints this antecedent
- *
- * @return a textual description of this antecedent
- */
- public String toString() {
- String symbol = ((int)value == 0) ? " <= " : " >= ";
- return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
- }
- }
- /**
- * The antecedent with nominal attribute
- */
- private class NominalAntd extends Antd{
- /* The parameters of infoGain calculated for each attribute value
- * in the growing data */
- private double[] accurate;
- private double[] coverage;
- /* Constructor*/
- public NominalAntd(Attribute a){
- super(a);
- int bag = att.numValues();
- accurate = new double[bag];
- coverage = new double[bag];
- }
- /** Implements Copyable */
- public Object copy(){
- Antd antec = new NominalAntd(getAttr());
- antec.value = this.value;
- return antec;
- }
- /**
- * Implements the splitData function.
- * This procedure is to split the data into bags according
- * to the nominal attribute value
- * The infoGain for each bag is also calculated.
- *
- * @param data the data to be split
- * @param defAcRt the default accuracy rate for data
- * @param cl the class label to be predicted
- * @return the array of data after split
- */
- public Instances[] splitData(Instances data, double defAcRt,
- double cl){
- int bag = att.numValues();
- Instances[] splitData = new Instances[bag];
- for(int x=0; x<bag; x++){
- splitData[x] = new Instances(data, data.numInstances());
- accurate[x] = 0;
- coverage[x] = 0;
- }
- for(int x=0; x<data.numInstances(); x++){
- Instance inst=data.instance(x);
- if(!inst.isMissing(att)){
- int v = (int)inst.value(att);
- splitData[v].add(inst);
- coverage[v] += inst.weight();
- if((int)inst.classValue() == (int)cl)
- accurate[v] += inst.weight();
- }
- }
- for(int x=0; x<bag; x++){
- double t = coverage[x]+1.0;
- double p = accurate[x] + 1.0;
- double infoGain =
- //Utils.eq(defAcRt, 1.0) ?
- //accurate[x]/(double)numConds :
- accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt));
- if(infoGain > maxInfoGain){
- maxInfoGain = infoGain;
- cover = coverage[x];
- accu = accurate[x];
- accuRate = p/t;
- value = (double)x;
- }
- }
- return splitData;
- }
- /**
- * Whether the instance is covered by this antecedent
- *
- * @param inst the instance in question
- * @return the boolean value indicating whether the instance is
- * covered by this antecedent
- */
- public boolean covers(Instance inst){
- boolean isCover=false;
- if(!inst.isMissing(att)){
- if((int)inst.value(att) == (int)value)
- isCover=true;
- }
- return isCover;
- }
- /**
- * Prints this antecedent
- *
- * @return a textual description of this antecedent
- */
- public String toString() {
- return (att.name() + " = " +att.value((int)value));
- }
- }
- /**
- * This class implements a single rule that predicts specified class.
- *
- * A rule consists of antecedents "AND"ed together and the consequent
- * (class value) for the classification.
- * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to
- * select an antecedent and Reduced Error Prunning (REP) with the metric
- * of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule.
- */
- protected class RipperRule extends Rule{
- /** The internal representation of the class label to be predicted*/
- private double m_Consequent = -1;
- /** The vector of antecedents of this rule*/
- protected FastVector m_Antds = null;
- public void setConsequent(double cl){ m_Consequent = cl; }
- public double getConsequent(){ return m_Consequent; }
- /** Constructor */
- public RipperRule(){
- m_Antds = new FastVector();
- }
- /**
- * Get a shallow copy of this rule
- *
- * @return the copy
- */
- public Object copy(){
- RipperRule copy = new RipperRule();
- copy.setConsequent(getConsequent());
- copy.m_Antds = (FastVector)this.m_Antds.copyElements();
- return copy;
- }
- /**
- * Whether the instance covered by this rule
- *
- * @param inst the instance in question
- * @return the boolean value indicating whether the instance
- * is covered by this rule
- */
- public boolean covers(Instance datum){
- boolean isCover=true;
- for(int i=0; i<m_Antds.size(); i++){
- Antd antd = (Antd)m_Antds.elementAt(i);
- if(!antd.covers(datum)){
- isCover = false;
- break;
- }
- }
- return isCover;
- }
- /**
- * Whether this rule has antecedents, i.e. whether it is a default rule
- *
- * @return the boolean value indicating whether the rule has antecedents
- */
- public boolean hasAntds(){
- if (m_Antds == null)
- return false;
- else
- return (m_Antds.size() > 0);
- }
- /**
- * the number of antecedents of the rule
- *
- * @return the size of this rule
- */
- public double size(){ return (double)m_Antds.size(); }
- /**
- * Private function to compute default number of accurate instances
- * in the specified data for the consequent of the rule
- *
- * @param data the data in question
- * @return the default accuracy number
- */
- private double computeDefAccu(Instances data){
- double defAccu=0;
- for(int i=0; i<data.numInstances(); i++){
- Instance inst = data.instance(i);
- if((int)inst.classValue() == (int)m_Consequent)
- defAccu += inst.weight();
- }
- return defAccu;
- }
- /**
- * Build one rule using the growing data
- *
- * @param data the growing data used to build the rule
- * @exception if the consequent is not set yet
- */
- public void grow(Instances data) throws Exception{
- if(m_Consequent == -1)
- throw new Exception(" Consequent not set yet.");
- Instances growData = data;
- double sumOfWeights = growData.sumOfWeights();
- if(!Utils.gr(sumOfWeights, 0.0))
- return;
- /* Compute the default accurate rate of the growing data */
- double defAccu = computeDefAccu(growData);
- double defAcRt = (defAccu+1.0)/(sumOfWeights+1.0);
- /* Keep the record of which attributes have already been used*/
- boolean[] used=new boolean [growData.numAttributes()];
- for (int k=0; k<used.length; k++)
- used[k]=false;
- int numUnused=used.length;
- // If there are already antecedents existing
- for(int j=0; j < m_Antds.size(); j++){
- Antd antdj = (Antd)m_Antds.elementAt(j);
- if(!antdj.getAttr().isNumeric()){
- used[antdj.getAttr().index()]=true;
- numUnused--;
- }
- }
- double maxInfoGain;
- while (Utils.gr(growData.numInstances(), 0.0) &&
- (numUnused > 0)
- && Utils.sm(defAcRt, 1.0)
- ){
- // We require that infoGain be positive
- /*if(numAntds == originalSize)
- maxInfoGain = 0.0; // At least one condition allowed
- else
- maxInfoGain = Utils.eq(defAcRt, 1.0) ?
- defAccu/(double)numAntds : 0.0; */
- maxInfoGain = 0.0;
- /* Build a list of antecedents */
- Antd oneAntd=null;
- Instances coverData = null;
- Enumeration enumAttr=growData.enumerateAttributes();
- int index=-1;
- /* Build one condition based on all attributes not used yet*/
- while (enumAttr.hasMoreElements()){
- Attribute att= (Attribute)(enumAttr.nextElement());
- index++;
- if(m_Debug)
- System.err.println("nOne condition: size = "
- + growData.sumOfWeights());
- Antd antd =null;
- if(att.isNumeric())
- antd = new NumericAntd(att);
- else
- antd = new NominalAntd(att);
- if(!used[index]){
- /* Compute the best information gain for each attribute,
- it's stored in the antecedent formed by this attribute.
- This procedure returns the data covered by the antecedent*/
- Instances coveredData = computeInfoGain(growData, defAcRt,
- antd);
- if(coveredData != null){
- double infoGain = antd.getMaxInfoGain();
- if(m_Debug)
- System.err.println("Test of '"+antd.toString()+
- "': infoGain = "+
- infoGain + " | Accuracy = " +
- antd.getAccuRate()+
- "="+antd.getAccu()
- +"/"+antd.getCover()+
- " def. accuracy: "+defAcRt);
- if(infoGain > maxInfoGain){
- oneAntd=antd;
- coverData = coveredData;
- maxInfoGain = infoGain;
- }
- }
- }
- }
- if(oneAntd == null) break; // Cannot find antds
- if(Utils.sm(oneAntd.getAccu(), m_MinNo)) break;// Too low coverage
- //Numeric attributes can be used more than once
- if(!oneAntd.getAttr().isNumeric()){
- used[oneAntd.getAttr().index()]=true;
- numUnused--;
- }
- m_Antds.addElement(oneAntd);
- growData = coverData;// Grow data size is shrinking
- defAcRt = oneAntd.getAccuRate();
- }
- }
- /**
- * Compute the best information gain for the specified antecedent
- *
- * @param instances the data based on which the infoGain is computed
- * @param defAcRt the default accuracy rate of data
- * @param antd the specific antecedent
- * @param numConds the number of antecedents in the rule so far
- * @return the data covered by the antecedent
- */
- private Instances computeInfoGain(Instances instances, double defAcRt,
- Antd antd){
- Instances data = instances;
- /* Split the data into bags.
- The information gain of each bag is also calculated in this procedure */
- Instances[] splitData = antd.splitData(data, defAcRt,
- m_Consequent);
- /* Get the bag of data to be used for next antecedents */
- if(splitData != null)
- return splitData[(int)antd.getAttrValue()];
- else return null;
- }
- /**
- * Prune all the possible final sequences of the rule using the
- * pruning data. The measure used to prune the rule is based on
- * flag given.
- *
- * @param pruneData the pruning data used to prune the rule
- * @param useWhole flag to indicate whether use the error rate of
- * the whole pruning data instead of the data covered
- */
- public void prune(Instances pruneData, boolean useWhole){
- Instances data = pruneData;
- double total = data.sumOfWeights();
- if(!Utils.gr(total, 0.0))
- return;
- /* The default accurate # and rate on pruning data */
- double defAccu=computeDefAccu(data);
- if(m_Debug)
- System.err.println("Pruning with " + defAccu +
- " positive data out of " + total +
- " instances");
- int size=m_Antds.size();
- if(size == 0) return; // Default rule before pruning
- double[] worthRt = new double[size];
- double[] coverage = new double[size];
- double[] worthValue = new double[size];
- for(int w=0; w<size; w++){
- worthRt[w]=coverage[w]=worthValue[w]=0.0;
- }
- /* Calculate accuracy parameters for all the antecedents in this rule */
- double tn = 0.0; // True negative if useWhole
- for(int x=0; x<size; x++){
- Antd antd=(Antd)m_Antds.elementAt(x);
- Attribute attr= antd.getAttr();
- Instances newData = data;
- data = new Instances(newData, 0); // Make data empty
- for(int y=0; y<newData.numInstances(); y++){
- Instance ins=newData.instance(y);
- if(antd.covers(ins)){ // Covered by this antecedent
- coverage[x] += ins.weight();
- data.add(ins); // Add to data for further pruning
- if((int)ins.classValue() == (int)m_Consequent) // Accurate prediction
- worthValue[x] += ins.weight();
- }
- else if(useWhole){ // Not covered
- if((int)ins.classValue() != (int)m_Consequent)
- tn += ins.weight();
- }
- }
- if(useWhole){
- worthValue[x] += tn;
- worthRt[x] = worthValue[x] / total;
- }
- else // Note if coverage is 0, accuracy is 0.5
- worthRt[x] = (worthValue[x]+1.0)/(coverage[x]+2.0);
- }
- double maxValue = (defAccu+1.0)/(total+2.0);
- int maxIndex = -1;
- for(int i=0; i<worthValue.length; i++){
- if(m_Debug){
- double denom = useWhole ? total : coverage[i];
- System.err.println(i+"(useAccuray? "+!useWhole+"): "
- + worthRt[i] +
- "="+worthValue[i]+
- "/"+denom);
- }
- if(worthRt[i] > maxValue){ // Prefer to the
- maxValue = worthRt[i]; // shorter rule
- maxIndex = i;
- }
- }
- /* Prune the antecedents according to the accuracy parameters */
- for(int z=size-1;z>maxIndex;z--)
- m_Antds.removeElementAt(z);
- }
- /**
- * Prints this rule
- *
- * @param classAttr the class attribute in the data
- * @return a textual description of this rule
- */
- public String toString(Attribute classAttr) {
- StringBuffer text = new StringBuffer();
- if(m_Antds.size() > 0){
- for(int j=0; j< (m_Antds.size()-1); j++)
- text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
- text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
- }
- text.append(" => " + classAttr.name() +
- "=" + classAttr.value((int)m_Consequent));
- return text.toString();
- }
- }
- /**
- * Builds Ripper in the order of class frequencies. For each class
- * it's built in two stages: building and optimization
- *
- * @param instances the training data
- * @exception Exception if classifier can't be built successfully
- */
- public void buildClassifier(Instances instances) throws Exception{
- if(instances.numInstances() == 0)
- throw new Exception(" No instances with a class value!");
- if (instances.checkForStringAttributes())
- throw new UnsupportedAttributeTypeException(" Cannot handle string attributes!");
- if (!instances.classAttribute().isNominal())
- throw new UnsupportedClassTypeException(" Only nominal class, please.");
- m_Random = new Random(m_Seed);
- m_Total = RuleStats.numAllConditions(instances);
- if(m_Debug)
- System.err.println("Number of all possible conditions = "+m_Total);
- Instances data = null;
- m_Filter = new ClassOrder();
- // Sth. to make the class order different each time in cross-validations
- Instance inst =
- instances.instance((int)(m_Random.nextDouble()*(double)instances.numInstances()));
- ((ClassOrder)m_Filter).setSeed((long)inst.toString().hashCode());
- ((ClassOrder)m_Filter).setClassOrder(ClassOrder.FREQ_ASCEND);
- m_Filter.setInputFormat(instances);
- data = Filter.useFilter(instances, m_Filter);
- if(data == null)
- throw new Exception(" Unable to randomize the class orders.");
- data.deleteWithMissingClass();
- if(data.numInstances() == 0)
- throw new Exception(" No instances with a class value!");
- if(data.numInstances() < m_Folds)
- throw new Exception(" Not enough data for REP.");
- m_Class = data.classAttribute();
- m_Ruleset = new FastVector();
- m_RulesetStats = new FastVector();
- m_Distributions = new FastVector();
- // Sort by classes frequency
- double[] orderedClasses = ((ClassOrder)m_Filter).getClassCounts();
- if(m_Debug){
- System.err.println("Sorted classes:");
- for(int x=0; x < m_Class.numValues(); x++)
- System.err.println(x+": "+m_Class.value(x) + " has " +
- orderedClasses[x] + " instances.");
- }
- // Iterate from less prevalent class to more frequent one
- oneClass:
- for(int y=0; y < data.numClasses()-1; y++){ // For each class
- double classIndex = (double)y;
- if(m_Debug){
- int ci = (int)classIndex;
- System.err.println("nnClass "+m_Class.value(ci)+"("+ci+"): "
- + orderedClasses[y] + "instancesn"+
- "=====================================n");
- }
- if(Utils.eq(orderedClasses[y],0.0)) // No data for this class
- continue oneClass;
- // The expected FP/err is the proportion of the class
- double all = 0;
- for(int i=y; i<orderedClasses.length; i++)
- all += orderedClasses[i];
- double expFPRate = orderedClasses[y] / all;
- double classYWeights = 0, totalWeights = 0;
- for(int j=0; j < data.numInstances(); j++){
- Instance datum = data.instance(j);
- totalWeights += datum.weight();
- if((int)datum.classValue() == y){
- classYWeights += datum.weight();
- }
- }
- // DL of default rule, no theory DL, only data DL
- double defDL;
- if(classYWeights > 0)
- defDL = RuleStats.dataDL(expFPRate,
- 0.0,
- totalWeights,
- 0.0,
- classYWeights);
- else
- continue oneClass; // Subsumed by previous rules
- if(Double.isNaN(defDL) || Double.isInfinite(defDL))
- throw new Exception("Should never happen: "+
- "defDL NaN or infinite!");
- if(m_Debug)
- System.err.println("The default DL = "+defDL);
- data = rulesetForOneClass(expFPRate, data, classIndex, defDL);
- }
- // Set the default rule
- RipperRule defRule = new RipperRule();
- defRule.setConsequent((double)(data.numClasses()-1));
- m_Ruleset.addElement(defRule);
- RuleStats defRuleStat = new RuleStats();
- defRuleStat.setData(data);
- defRuleStat.setNumAllConds(m_Total);
- defRuleStat.addAndUpdate(defRule);
- m_RulesetStats.addElement(defRuleStat);
- for(int z=0; z < m_RulesetStats.size(); z++){
- RuleStats oneClass = (RuleStats)m_RulesetStats.elementAt(z);
- for(int xyz=0; xyz < oneClass.getRulesetSize(); xyz++){
- double[] classDist = oneClass.getDistributions(xyz);
- Utils.normalize(classDist);
- if(classDist != null)
- m_Distributions.addElement(((ClassOrder)m_Filter).distributionsByOriginalIndex(classDist));
- }
- }
- }
- /**
- * Classify the test instance with the rule learner and provide
- * the class distributions
- *
- * @param datum the instance to be classified
- * @return the distribution
- */
- public double[] distributionForInstance(Instance datum){
- try{
- for(int i=0; i < m_Ruleset.size(); i++){
- RipperRule rule = (RipperRule)m_Ruleset.elementAt(i);
- if(rule.covers(datum))
- return (double[])m_Distributions.elementAt(i);
- }
- }catch(Exception e){
- System.err.println(e.getMessage());
- e.printStackTrace();
- }
- System.err.println("Should never happen!");
- return new double[datum.classAttribute().numValues()];
- }
- /** Build a ruleset for the given class according to the given data
- *
- * @param expFPRate the expected FP/(FP+FN) used in DL calculation
- * @param data the given data
- * @param classIndex the given class index
- * @param defDL the default DL in the data
- * @exception if the ruleset can be built properly
- */
- protected Instances rulesetForOneClass(double expFPRate,
- Instances data,
- double classIndex,
- double defDL)
- throws Exception{
- Instances newData = data, growData, pruneData;
- boolean stop = false;
- FastVector ruleset = new FastVector();
- double dl = defDL, minDL = defDL;
- RuleStats rstats = null;
- double[] rst;
- // Check whether data have positive examples
- boolean defHasPositive = true; // No longer used
- boolean hasPositive = defHasPositive;
- /********************** Building stage ***********************/
- if(m_Debug)
- System.err.println("n*** Building stage ***");
- while((!stop) && hasPositive){ // Generate new rules until
- // stopping criteria met
- RipperRule oneRule;
- if(m_UsePruning){
- /* Split data into Grow and Prune*/
- // We should have stratified the data, but ripper seems
- // to have a bug that makes it not to do so. In order
- // to simulate it more precisely, we do the same thing.
- //newData.randomize(m_Random);
- newData = RuleStats.stratify(newData, m_Folds, m_Random);
- Instances[] part = RuleStats.partition(newData, m_Folds);
- growData=part[0];
- pruneData=part[1];
- //growData=newData.trainCV(m_Folds, m_Folds-1);
- //pruneData=newData.testCV(m_Folds, m_Folds-1);
- oneRule = new RipperRule();
- oneRule.setConsequent(classIndex); // Must set first
- if(m_Debug)
- System.err.println("nGrowing a rule ...");
- oneRule.grow(growData); // Build the rule
- if(m_Debug)
- System.err.println("One rule found before pruning:"+
- oneRule.toString(m_Class));
- if(m_Debug)
- System.err.println("nPruning the rule ...");
- oneRule.prune(pruneData, false); // Prune the rule
- if(m_Debug)
- System.err.println("One rule found after pruning:"+
- oneRule.toString(m_Class));
- }
- else{
- oneRule = new RipperRule();
- oneRule.setConsequent(classIndex); // Must set first
- if(m_Debug)
- System.err.println("nNo pruning: growing a rule ...");
- oneRule.grow(newData); // Build the rule
- if(m_Debug)
- System.err.println("No pruning: one rule found:n"+
- oneRule.toString(m_Class));
- }
- // Compute the DL of this ruleset
- if(rstats == null){ // First rule
- rstats = new RuleStats();
- rstats.setNumAllConds(m_Total);
- rstats.setData(newData);
- }
- rstats.addAndUpdate(oneRule);
- int last = rstats.getRuleset().size()-1; // Index of last rule
- dl += rstats.relativeDL(last, expFPRate, m_CheckErr);
- if(Double.isNaN(dl) || Double.isInfinite(dl))
- throw new Exception("Should never happen: dl in "+
- "building stage NaN or infinite!");
- if(m_Debug)
- System.err.println("Before optimization("+last+
- "): the dl = "+dl+" | best: "+minDL);
- if(dl < minDL)
- minDL = dl; // The best dl so far
- rst = rstats.getSimpleStats(last);
- if(m_Debug)
- System.err.println("The rule covers: "+rst[0]+
- " | pos = " + rst[2] +
- " | neg = " + rst[4]+
- "nThe rule doesn't cover: "+rst[1]+
- " | pos = " + rst[5]);
- stop = checkStop(rst, minDL, dl);
- if(!stop){
- ruleset.addElement(oneRule); // Accepted
- newData = rstats.getFiltered(last)[1];// Data not covered
- hasPositive = Utils.gr(rst[5], 0.0); // Positives remaining?
- if(m_Debug)
- System.err.println("One rule added: has positive? "
- +hasPositive);
- }
- else{
- if(m_Debug)
- System.err.println("Quit rule");
- rstats.removeLast(); // Remove last to be re-used
- }
- }// while !stop
- /******************** Optimization stage *******************/
- RuleStats finalRulesetStat = null;
- if(m_UsePruning){
- for(int z=0; z < m_Optimizations; z++){
- if(m_Debug)
- System.err.println("n*** Optimization: run #"
- +z+" ***");
- newData = data;
- finalRulesetStat = new RuleStats();
- finalRulesetStat.setData(newData);
- finalRulesetStat.setNumAllConds(m_Total);
- int position=0;
- stop = false;
- boolean isResidual = false;
- hasPositive = defHasPositive;
- dl = minDL = defDL;
- oneRule:
- while(!stop && hasPositive){
- isResidual = (position>=ruleset.size()); // Cover residual positive examples
- // Re-do shuffling and stratification
- //newData.randomize(m_Random);
- newData = RuleStats.stratify(newData, m_Folds, m_Random);
- Instances[] part = RuleStats.partition(newData, m_Folds);
- growData=part[0];
- pruneData=part[1];
- //growData=newData.trainCV(m_Folds, m_Folds-1);
- //pruneData=newData.testCV(m_Folds, m_Folds-1);
- RipperRule finalRule;
- if(m_Debug)
- System.err.println("nRule #"+position +
- "| isResidual?" + isResidual+
- "| data size: "+newData.sumOfWeights());
- if(isResidual){
- RipperRule newRule = new RipperRule();
- newRule.setConsequent(classIndex);
- if(m_Debug)
- System.err.println("nGrowing and pruning"+
- " a new rule ...");
- newRule.grow(growData);
- newRule.prune(pruneData, false);
- finalRule = newRule;
- if(m_Debug)
- System.err.println("nNew rule found: "+
- newRule.toString(m_Class));
- }
- else{
- RipperRule oldRule = (RipperRule)ruleset.elementAt(position);
- boolean covers = false;
- // Test coverage of the next old rule
- for(int i=0; i<newData.numInstances(); i++)
- if(oldRule.covers(newData.instance(i))){
- covers = true;
- break;
- }
- if(!covers){// Null coverage, no variants can be generated
- finalRulesetStat.addAndUpdate(oldRule);
- position++;
- continue oneRule;
- }
- // 2 variants
- if(m_Debug)
- System.err.println("nGrowing and pruning"+
- " Replace ...");
- RipperRule replace = new RipperRule();
- replace.setConsequent(classIndex);
- replace.grow(growData);
- // Remove the pruning data covered by the following
- // rules, then simply compute the error rate of the
- // current rule to prune it. According to Ripper,
- // it's equivalent to computing the error of the
- // whole ruleset -- is it true?
- pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position);
- replace.prune(pruneData, true);
- if(m_Debug)
- System.err.println("nGrowing and pruning"+
- " Revision ...");
- RipperRule revision = (RipperRule)oldRule.copy();
- // For revision, first rm the data covered by the old rule
- Instances newGrowData = new Instances(growData, 0);
- for(int b=0; b<growData.numInstances(); b++){
- Instance inst = growData.instance(b);
- if(revision.covers(inst))
- newGrowData.add(inst);
- }
- revision.grow(newGrowData);
- revision.prune(pruneData, true);
- double[][] prevRuleStats = new double[position][6];
- for(int c=0; c < position; c++)
- prevRuleStats[c] = finalRulesetStat.getSimpleStats(c);
- // Now compare the relative DL of variants
- FastVector tempRules = (FastVector)ruleset.copyElements();
- tempRules.setElementAt(replace, position);
- RuleStats repStat = new RuleStats(data, tempRules);
- repStat.setNumAllConds(m_Total);
- repStat.countData(position, newData, prevRuleStats);
- //repStat.countData();
- rst = repStat.getSimpleStats(position);
- if(m_Debug)
- System.err.println("Replace rule covers: "+rst[0]+
- " | pos = " + rst[2] +
- " | neg = " + rst[4]+
- "nThe rule doesn't cover: "+rst[1]+
- " | pos = " + rst[5]);
- double repDL = repStat.relativeDL(position, expFPRate,
- m_CheckErr);
- if(m_Debug)
- System.err.println("nReplace: "+
- replace.toString(m_Class)
- +" |dl = "+repDL);
- if(Double.isNaN(repDL) || Double.isInfinite(repDL))
- throw new Exception("Should never happen: repDL"+
- "in optmz. stage NaN or "+
- "infinite!");
- tempRules.setElementAt(revision, position);
- RuleStats revStat = new RuleStats(data, tempRules);
- revStat.setNumAllConds(m_Total);
- revStat.countData(position, newData, prevRuleStats);
- //revStat.countData();
- double revDL = revStat.relativeDL(position, expFPRate,
- m_CheckErr);
- if(m_Debug)
- System.err.println("Revision: "
- + revision.toString(m_Class)
- +" |dl = "+revDL);
- if(Double.isNaN(revDL) || Double.isInfinite(revDL))
- throw new Exception("Should never happen: revDL"+
- "in optmz. stage NaN or "+
- "infinite!");
- rstats = new RuleStats(data, ruleset);
- rstats.setNumAllConds(m_Total);
- rstats.countData(position, newData, prevRuleStats);
- //rstats.countData();
- double oldDL = rstats.relativeDL(position, expFPRate,
- m_CheckErr);
- if(Double.isNaN(oldDL) || Double.isInfinite(oldDL))
- throw new Exception("Should never happen: oldDL"+
- "in optmz. stage NaN or "+
- "infinite!");
- if(m_Debug)
- System.err.println("Old rule: "+
- oldRule.toString(m_Class)
- +" |dl = "+oldDL);
- if(m_Debug)
- System.err.println("nrepDL: "+repDL+
- "nrevDL: "+revDL+
- "noldDL: "+oldDL);
- if((oldDL <= revDL) && (oldDL <= repDL))
- finalRule = oldRule; // Old the best
- else if(revDL <= repDL)
- finalRule = revision; // Revision the best
- else
- finalRule = replace; // Replace the best
- }
- finalRulesetStat.addAndUpdate(finalRule);
- rst = finalRulesetStat.getSimpleStats(position);
- if(isResidual){
- dl += finalRulesetStat.relativeDL(position,
- expFPRate,
- m_CheckErr);
- if(m_Debug)
- System.err.println("After optimization: the dl"
- +"="+dl+" | best: "+minDL);
- if(dl < minDL)
- minDL = dl; // The best dl so far
- stop = checkStop(rst, minDL, dl);
- if(!stop)
- ruleset.addElement(finalRule); // Accepted
- else{
- finalRulesetStat.removeLast(); // Remove last to be re-used
- position--;
- }
- }
- else
- ruleset.setElementAt(finalRule, position); // Accepted
- if(m_Debug){
- System.err.println("The rule covers: "+rst[0]+
- " | pos = " + rst[2] +
- " | neg = " + rst[4]+
- "nThe rule doesn't cover: "+rst[1]+
- " | pos = " + rst[5]);
- System.err.println("nRuleset so far: ");
- for(int x=0; x<ruleset.size(); x++)
- System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
- System.err.println();
- }
- //Data not covered
- if(finalRulesetStat.getRulesetSize() > 0)// If any rules
- newData = finalRulesetStat.getFiltered(position)[1];
- hasPositive = Utils.gr(rst[5], 0.0); //Positives remaining?
- position++;
- } // while !stop && hasPositive
- if(ruleset.size() > (position+1)){ // Hasn't gone through yet
- for(int k=position+1; k<ruleset.size(); k++)
- finalRulesetStat.addAndUpdate((Rule)ruleset.elementAt(k));
- }
- if(m_Debug)
- System.err.println("nDeleting rules to decrease"+
- " DL of the whole ruleset ...");
- finalRulesetStat.reduceDL(expFPRate, m_CheckErr);
- if(m_Debug){
- int del = ruleset.size() -
- finalRulesetStat.getRulesetSize();
- System.err.println(del+" rules are deleted"+
- " after DL reduction procedure");
- }
- ruleset = finalRulesetStat.getRuleset();
- rstats = finalRulesetStat;
- } // For each run of optimization
- } // if pruning is used
- // Concatenate the ruleset for this class to the whole ruleset
- if(m_Debug){
- System.err.println("nFinal ruleset: ");
- for(int x=0; x<ruleset.size(); x++)
- System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
- System.err.println();
- }
- m_Ruleset.appendElements(ruleset);
- m_RulesetStats.addElement(rstats);
- if(ruleset.size() > 0)// If any rules for this class
- return rstats.getFiltered(ruleset.size()-1)[1]; // Data not
- else // covered
- return data;
- }
- /**
- * Check whether the stopping criterion meets
- *
- * @param rst the statistic of the ruleset
- * @param minDL the min description length so far
- * @param dl the current description length of the ruleset
- * @return true if stop criterion meets, false otherwise
- */
- private boolean checkStop(double[] rst, double minDL, double dl){
- if(dl > minDL+MAX_DL_SURPLUS){
- if(m_Debug)
- System.err.println("DL too large: "+dl+" | "+minDL);
- return true;
- }
- else if(!Utils.gr(rst[2], 0.0)){// Covered positives
- if(m_Debug)
- System.err.println("Too few positives.");
- return true;
- }
- else if((rst[4]/rst[0]) >= 0.5){// Err rate
- if(m_CheckErr){
- if(m_Debug)
- System.err.println("Error too large: "+
- rst[4] + "/" + rst[0]);
- return true;
- }
- else
- return false;
- }
- else{// Not stops
- if(m_Debug)
- System.err.println("Continue.");
- return false;
- }
- }
- /**
- * Prints the all the rules of the rule learner.
- *
- * @return a textual description of the classifier
- */
- public String toString() {
- if (m_Ruleset == null)
- return "JRIP: No model built yet.";
- StringBuffer sb = new StringBuffer("JRIP rules:n"+
- "===========nn");
- for(int j=0; j<m_RulesetStats.size(); j++){
- RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j);
- FastVector rules = rs.getRuleset();
- for(int k=0; k<rules.size(); k++){
- double[] simStats = rs.getSimpleStats(k);
- sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class)
- + " ("+simStats[0]+"/"+simStats[4]+")n");
- }
- }
- if(m_Debug){
- System.err.println("Inside m_Ruleset");
- for(int i=0; i<m_Ruleset.size(); i++)
- System.err.println(((RipperRule)m_Ruleset.elementAt(i)).toString(m_Class));
- }
- sb.append("nNumber of Rules : "
- + m_Ruleset.size() + "n");
- return sb.toString();
- }
- /**
- * Main method.
- *
- * @param args the options for the classifier
- */
- public static void main(String[] args) {
- try {
- System.out.println(Evaluation.evaluateModel(new JRip(), args));
- } catch (Exception e) {
- e.printStackTrace();
- System.err.println(e.getMessage());
- }
- }
- }